Skip to content

Commit 4357e11

Browse files
authored
Merge pull request #592 from bjornkallerud/deprecate-lags
Deprecate lag argument in initial_time_split and update NEWS
2 parents 3c8ed56 + 2ebf417 commit 4357e11

File tree

8 files changed

+70
-27
lines changed

8 files changed

+70
-27
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# rsample (development version)
22

3+
* The lag argument for `initial_time_split()` has been soft deprecated (@bjornkallerud, #592).
4+
35
# rsample 1.3.1
46

57
* The new `internal_calibration_split()` function and its methods for various resamples is for usage in tune to create a internal split of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483, #488, #489, #569, #575, #577, #582).

R/initial_split.R

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
#'
1010
#' @details `training()` and `testing()` are used to extract the resulting data.
1111
#'
12+
#' To avoid data leakage when using lagged variables, lag the predictors before
13+
#' the initial split.
14+
#'
1215
#' @template strata_details
1316
#' @inheritParams vfold_cv
1417
#' @inheritParams make_strata
@@ -26,13 +29,7 @@
2629
#' drinks_split <- initial_time_split(drinks)
2730
#' train_data <- training(drinks_split)
2831
#' test_data <- testing(drinks_split)
29-
#' c(max(train_data$date), min(test_data$date)) # no lag
30-
#'
31-
#' # With 12 period lag
32-
#' drinks_lag_split <- initial_time_split(drinks, lag = 12)
33-
#' train_data <- training(drinks_lag_split)
34-
#' test_data <- testing(drinks_lag_split)
35-
#' c(max(train_data$date), min(test_data$date)) # 12 period lag
32+
#' c(max(train_data$date), min(test_data$date))
3633
#'
3734
#' set.seed(1353)
3835
#' car_split <- group_initial_split(mtcars, cyl)
@@ -76,14 +73,28 @@ initial_split <- function(
7673
}
7774

7875
#' @rdname initial_split
79-
#' @param lag A value to include a lag between the assessment
80-
#' and analysis set. This is useful if lagged predictors will be used
81-
#' during training and testing.
76+
#' @param lag `r lifecycle::badge("deprecated")` This is deprecated, please lag
77+
#' your predictors prior to splitting the dataset.
8278
#' @export
83-
initial_time_split <- function(data, prop = 3 / 4, lag = 0, ...) {
79+
initial_time_split <- function(
80+
data,
81+
prop = 3 / 4,
82+
lag = lifecycle::deprecated(),
83+
...
84+
) {
8485
check_dots_empty()
8586
check_prop(prop)
8687

88+
if (lifecycle::is_present(lag)) {
89+
lifecycle::deprecate_soft(
90+
when = "1.3.1.9000",
91+
what = "initial_time_split(lag)",
92+
details = "Please lag your predictors prior to splitting the dataset."
93+
)
94+
} else {
95+
lag <- 0
96+
}
97+
8798
if (!is.numeric(lag) | !(lag %% 1 == 0)) {
8899
cli_abort("{.arg lag} must be a whole number.")
89100
}

R/internal_calibration_split.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,11 @@ internal_calibration_split.initial_time_split <- function(x, split_args, ...) {
669669

670670
training_set <- training(x)
671671

672+
# to avoid deprecation warning if lag is just the default of 0
673+
if (-1 < split_args$lag && split_args$lag < 1) {
674+
split_args$lag <- NULL
675+
}
676+
672677
split_cal <- internal_calibration_split_core(
673678
training_set,
674679
split_function = initial_time_split,

R/validation_split.R

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,9 @@ validation_split <- function(
9292
split_objs$splits <- map(split_objs$splits, rm_out)
9393
class(split_objs$splits[[1]]) <- c("val_split", "rsplit")
9494

95-
if (!is.null(strata)) names(strata) <- NULL
95+
if (!is.null(strata)) {
96+
names(strata) <- NULL
97+
}
9698
val_att <- list(
9799
prop = prop,
98100
strata = strata,
@@ -111,6 +113,9 @@ validation_split <- function(
111113
#' @rdname validation_split
112114
#' @inheritParams vfold_cv
113115
#' @inheritParams initial_time_split
116+
#' @param lag A value to include a lag between the assessment and analysis set.
117+
#' This is useful if lagged predictors will be used during training and
118+
#' testing.
114119
#' @export
115120
validation_time_split <- function(data, prop = 3 / 4, lag = 0, ...) {
116121
lifecycle::deprecate_warn(
@@ -195,7 +200,9 @@ group_validation_split <- function(
195200
class(split_objs$splits[[1]]) <- c("group_val_split", "val_split", "rsplit")
196201

197202
# This is needed for printing -- strata cannot be missing
198-
if (is.null(strata)) strata <- FALSE
203+
if (is.null(strata)) {
204+
strata <- FALSE
205+
}
199206
val_att <- list(
200207
prop = prop,
201208
group = group,

man/initial_split.Rd

Lines changed: 7 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/validation_split.Rd

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/_snaps/initial_split.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
# `lag` arg to `initial_time_split()` is deprecated
2+
3+
Code
4+
initial_time_split(mtcars, lag = 2)
5+
Condition
6+
Warning:
7+
The `lag` argument of `initial_time_split()` is deprecated as of rsample 1.3.1.9000.
8+
i Please lag your predictors prior to splitting the dataset.
9+
Output
10+
<Training/Testing/Total>
11+
<24/10/32>
12+
113
# `initial_time_split()` error messages
214

315
Code

tests/testthat/test-initial_split.R

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,15 @@ test_that("default time param", {
1818
expect_equal(tr1, dplyr::slice(dat1, 1:floor(nrow(dat1) * 3 / 4)))
1919
})
2020

21+
test_that("`lag` arg to `initial_time_split()` is deprecated", {
22+
expect_snapshot({
23+
initial_time_split(mtcars, lag = 2)
24+
})
25+
})
26+
2127
test_that("default time param with lag", {
28+
withr::local_options(lifecycle_verbosity = "quiet")
29+
2230
rs1 <- initial_time_split(dat1, lag = 5)
2331
expect_equal(class(rs1), c("initial_time_split", "initial_split", "rsplit"))
2432
tr1 <- training(rs1)
@@ -41,6 +49,8 @@ test_that("`initial_time_split()` error messages", {
4149
initial_time_split(drinks, prop = 2)
4250
})
4351

52+
withr::local_options(lifecycle_verbosity = "quiet")
53+
4454
expect_snapshot(error = TRUE, {
4555
initial_time_split(drinks, lag = 12.5)
4656
})

0 commit comments

Comments
 (0)