Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# rsample (development version)

* The lag argument for `initial_time_split()` has been soft deprecated (@bjornkallerud, #592).

# rsample 1.3.1

* The new `internal_calibration_split()` function and its methods for various resamples is for usage in tune to create a internal split of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483, #488, #489, #569, #575, #577, #582).
Expand Down
33 changes: 22 additions & 11 deletions R/initial_split.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#'
#' @details `training()` and `testing()` are used to extract the resulting data.
#'
#' To avoid data leakage when using lagged variables, lag the predictors before
#' the initial split.
#'
#' @template strata_details
#' @inheritParams vfold_cv
#' @inheritParams make_strata
Expand All @@ -26,13 +29,7 @@
#' drinks_split <- initial_time_split(drinks)
#' train_data <- training(drinks_split)
#' test_data <- testing(drinks_split)
#' c(max(train_data$date), min(test_data$date)) # no lag
#'
#' # With 12 period lag
#' drinks_lag_split <- initial_time_split(drinks, lag = 12)
#' train_data <- training(drinks_lag_split)
#' test_data <- testing(drinks_lag_split)
#' c(max(train_data$date), min(test_data$date)) # 12 period lag
#' c(max(train_data$date), min(test_data$date))
#'
#' set.seed(1353)
#' car_split <- group_initial_split(mtcars, cyl)
Expand Down Expand Up @@ -76,14 +73,28 @@ initial_split <- function(
}

#' @rdname initial_split
#' @param lag A value to include a lag between the assessment
#' and analysis set. This is useful if lagged predictors will be used
#' during training and testing.
#' @param lag `r lifecycle::badge("deprecated")` This is deprecated, please lag
#' your predictors prior to splitting the dataset.
#' @export
initial_time_split <- function(data, prop = 3 / 4, lag = 0, ...) {
initial_time_split <- function(
data,
prop = 3 / 4,
lag = lifecycle::deprecated(),
...
) {
check_dots_empty()
check_prop(prop)

if (lifecycle::is_present(lag)) {
lifecycle::deprecate_soft(
when = "1.3.1.9000",
what = "initial_time_split(lag)",
details = "Please lag your predictors prior to splitting the dataset."
)
} else {
lag <- 0
}

if (!is.numeric(lag) | !(lag %% 1 == 0)) {
cli_abort("{.arg lag} must be a whole number.")
}
Expand Down
5 changes: 5 additions & 0 deletions R/internal_calibration_split.R
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,11 @@ internal_calibration_split.initial_time_split <- function(x, split_args, ...) {

training_set <- training(x)

# to avoid deprecation warning if lag is just the default of 0
if (-1 < split_args$lag && split_args$lag < 1) {
split_args$lag <- NULL
}

split_cal <- internal_calibration_split_core(
training_set,
split_function = initial_time_split,
Expand Down
11 changes: 9 additions & 2 deletions R/validation_split.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ validation_split <- function(
split_objs$splits <- map(split_objs$splits, rm_out)
class(split_objs$splits[[1]]) <- c("val_split", "rsplit")

if (!is.null(strata)) names(strata) <- NULL
if (!is.null(strata)) {
names(strata) <- NULL
}
val_att <- list(
prop = prop,
strata = strata,
Expand All @@ -111,6 +113,9 @@ validation_split <- function(
#' @rdname validation_split
#' @inheritParams vfold_cv
#' @inheritParams initial_time_split
#' @param lag A value to include a lag between the assessment and analysis set.
#' This is useful if lagged predictors will be used during training and
#' testing.
#' @export
validation_time_split <- function(data, prop = 3 / 4, lag = 0, ...) {
lifecycle::deprecate_warn(
Expand Down Expand Up @@ -195,7 +200,9 @@ group_validation_split <- function(
class(split_objs$splits[[1]]) <- c("group_val_split", "val_split", "rsplit")

# This is needed for printing -- strata cannot be missing
if (is.null(strata)) strata <- FALSE
if (is.null(strata)) {
strata <- FALSE
}
val_att <- list(
prop = prop,
group = group,
Expand Down
18 changes: 7 additions & 11 deletions man/initial_split.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/validation_split.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions tests/testthat/_snaps/initial_split.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# `lag` arg to `initial_time_split()` is deprecated

Code
initial_time_split(mtcars, lag = 2)
Condition
Warning:
The `lag` argument of `initial_time_split()` is deprecated as of rsample 1.3.1.9000.
i Please lag your predictors prior to splitting the dataset.
Output
<Training/Testing/Total>
<24/10/32>

# `initial_time_split()` error messages

Code
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/test-initial_split.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,15 @@ test_that("default time param", {
expect_equal(tr1, dplyr::slice(dat1, 1:floor(nrow(dat1) * 3 / 4)))
})

test_that("`lag` arg to `initial_time_split()` is deprecated", {
expect_snapshot({
initial_time_split(mtcars, lag = 2)
})
})

test_that("default time param with lag", {
withr::local_options(lifecycle_verbosity = "quiet")

rs1 <- initial_time_split(dat1, lag = 5)
expect_equal(class(rs1), c("initial_time_split", "initial_split", "rsplit"))
tr1 <- training(rs1)
Expand All @@ -41,6 +49,8 @@ test_that("`initial_time_split()` error messages", {
initial_time_split(drinks, prop = 2)
})

withr::local_options(lifecycle_verbosity = "quiet")

expect_snapshot(error = TRUE, {
initial_time_split(drinks, lag = 12.5)
})
Expand Down