diff --git a/NEWS.md b/NEWS.md
index 5adedd6f..feb2636c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,7 @@
 # rsample (development version)
 
+* The lag argument for `initial_time_split()` has been soft deprecated (@bjornkallerud, #592).
+
 # rsample 1.3.1
 
 * The new `internal_calibration_split()` function and its methods for various resamples is for usage in tune to create a internal split of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483, #488, #489, #569, #575,  #577, #582).
diff --git a/R/initial_split.R b/R/initial_split.R
index d8626b4f..fe8af48d 100644
--- a/R/initial_split.R
+++ b/R/initial_split.R
@@ -9,6 +9,9 @@
 #'
 #' @details `training()` and `testing()` are used to extract the resulting data.
 #'
+#' To avoid data leakage when using lagged variables, lag the predictors before
+#' the initial split.
+#'
 #' @template strata_details
 #' @inheritParams vfold_cv
 #' @inheritParams make_strata
@@ -26,13 +29,7 @@
 #' drinks_split <- initial_time_split(drinks)
 #' train_data <- training(drinks_split)
 #' test_data <- testing(drinks_split)
-#' c(max(train_data$date), min(test_data$date)) # no lag
-#'
-#' # With 12 period lag
-#' drinks_lag_split <- initial_time_split(drinks, lag = 12)
-#' train_data <- training(drinks_lag_split)
-#' test_data <- testing(drinks_lag_split)
-#' c(max(train_data$date), min(test_data$date)) # 12 period lag
+#' c(max(train_data$date), min(test_data$date))
 #'
 #' set.seed(1353)
 #' car_split <- group_initial_split(mtcars, cyl)
@@ -76,14 +73,28 @@ initial_split <- function(
 }
 
 #' @rdname initial_split
-#' @param lag A value to include a lag between the assessment
-#'  and analysis set. This is useful if lagged predictors will be used
-#'  during training and testing.
+#' @param lag `r lifecycle::badge("deprecated")` This is deprecated, please lag
+#' your predictors prior to splitting the dataset.
 #' @export
-initial_time_split <- function(data, prop = 3 / 4, lag = 0, ...) {
+initial_time_split <- function(
+  data,
+  prop = 3 / 4,
+  lag = lifecycle::deprecated(),
+  ...
+) {
   check_dots_empty()
   check_prop(prop)
 
+  if (lifecycle::is_present(lag)) {
+    lifecycle::deprecate_soft(
+      when = "1.3.1.9000",
+      what = "initial_time_split(lag)",
+      details = "Please lag your predictors prior to splitting the dataset."
+    )
+  } else {
+    lag <- 0
+  }
+
   if (!is.numeric(lag) | !(lag %% 1 == 0)) {
     cli_abort("{.arg lag} must be a whole number.")
   }
diff --git a/R/internal_calibration_split.R b/R/internal_calibration_split.R
index 75cf6014..aee9ee20 100644
--- a/R/internal_calibration_split.R
+++ b/R/internal_calibration_split.R
@@ -669,6 +669,11 @@ internal_calibration_split.initial_time_split <- function(x, split_args, ...) {
 
   training_set <- training(x)
 
+  # to avoid deprecation warning if lag is just the default of 0
+  if (-1 < split_args$lag && split_args$lag < 1) {
+    split_args$lag <- NULL
+  }
+
   split_cal <- internal_calibration_split_core(
     training_set,
     split_function = initial_time_split,
diff --git a/R/validation_split.R b/R/validation_split.R
index 3b5af308..6c0b7f32 100644
--- a/R/validation_split.R
+++ b/R/validation_split.R
@@ -92,7 +92,9 @@ validation_split <- function(
   split_objs$splits <- map(split_objs$splits, rm_out)
   class(split_objs$splits[[1]]) <- c("val_split", "rsplit")
 
-  if (!is.null(strata)) names(strata) <- NULL
+  if (!is.null(strata)) {
+    names(strata) <- NULL
+  }
   val_att <- list(
     prop = prop,
     strata = strata,
@@ -111,6 +113,9 @@ validation_split <- function(
 #' @rdname validation_split
 #' @inheritParams vfold_cv
 #' @inheritParams initial_time_split
+#' @param lag A value to include a lag between the assessment and analysis set.
+#' This is useful if lagged predictors will be used during training and
+#' testing.
 #' @export
 validation_time_split <- function(data, prop = 3 / 4, lag = 0, ...) {
   lifecycle::deprecate_warn(
@@ -195,7 +200,9 @@ group_validation_split <- function(
   class(split_objs$splits[[1]]) <- c("group_val_split", "val_split", "rsplit")
 
   # This is needed for printing -- strata cannot be missing
-  if (is.null(strata)) strata <- FALSE
+  if (is.null(strata)) {
+    strata <- FALSE
+  }
   val_att <- list(
     prop = prop,
     group = group,
diff --git a/man/initial_split.Rd b/man/initial_split.Rd
index 07fc96b6..d339e305 100644
--- a/man/initial_split.Rd
+++ b/man/initial_split.Rd
@@ -14,7 +14,7 @@
 \usage{
 initial_split(data, prop = 3/4, strata = NULL, breaks = 4, pool = 0.1, ...)
 
-initial_time_split(data, prop = 3/4, lag = 0, ...)
+initial_time_split(data, prop = 3/4, lag = lifecycle::deprecated(), ...)
 
 training(x, ...)
 
@@ -49,9 +49,8 @@ of stratifying groups that are too small.}
 
 \item{...}{These dots are for future extensions and must be empty.}
 
-\item{lag}{A value to include a lag between the assessment
-and analysis set. This is useful if lagged predictors will be used
-during training and testing.}
+\item{lag}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This is deprecated, please lag
+your predictors prior to splitting the dataset.}
 
 \item{x}{An \code{rsplit} object produced by \code{initial_split()} or
 \code{initial_time_split()}.}
@@ -75,6 +74,9 @@ the same split.
 \details{
 \code{training()} and \code{testing()} are used to extract the resulting data.
 
+To avoid data leakage when using lagged variables, lag the predictors before
+the initial split.
+
 With a \code{strata} argument, the random sampling is conducted
 \emph{within the stratification variable}. This can help ensure that the
 resamples have equivalent proportions as the original data set. For
@@ -94,13 +96,7 @@ data(drinks, package = "modeldata")
 drinks_split <- initial_time_split(drinks)
 train_data <- training(drinks_split)
 test_data <- testing(drinks_split)
-c(max(train_data$date), min(test_data$date)) # no lag
-
-# With 12 period lag
-drinks_lag_split <- initial_time_split(drinks, lag = 12)
-train_data <- training(drinks_lag_split)
-test_data <- testing(drinks_lag_split)
-c(max(train_data$date), min(test_data$date)) # 12 period lag
+c(max(train_data$date), min(test_data$date))
 
 set.seed(1353)
 car_split <- group_initial_split(mtcars, cyl)
diff --git a/man/validation_split.Rd b/man/validation_split.Rd
index a8577041..d72118db 100644
--- a/man/validation_split.Rd
+++ b/man/validation_split.Rd
@@ -31,9 +31,9 @@ of stratifying groups that are too small.}
 
 \item{...}{These dots are for future extensions and must be empty.}
 
-\item{lag}{A value to include a lag between the assessment
-and analysis set. This is useful if lagged predictors will be used
-during training and testing.}
+\item{lag}{A value to include a lag between the assessment and analysis set.
+This is useful if lagged predictors will be used during training and
+testing.}
 
 \item{group}{A variable in \code{data} (single character or name) used for
 grouping observations with the same value to either the analysis or
diff --git a/tests/testthat/_snaps/initial_split.md b/tests/testthat/_snaps/initial_split.md
index 37bb892a..94b26ca6 100644
--- a/tests/testthat/_snaps/initial_split.md
+++ b/tests/testthat/_snaps/initial_split.md
@@ -1,3 +1,15 @@
+# `lag` arg to `initial_time_split()` is deprecated
+
+    Code
+      initial_time_split(mtcars, lag = 2)
+    Condition
+      Warning:
+      The `lag` argument of `initial_time_split()` is deprecated as of rsample 1.3.1.9000.
+      i Please lag your predictors prior to splitting the dataset.
+    Output
+      <Training/Testing/Total>
+      <24/10/32>
+
 # `initial_time_split()` error messages
 
     Code
diff --git a/tests/testthat/test-initial_split.R b/tests/testthat/test-initial_split.R
index 476be1ab..49bbe269 100644
--- a/tests/testthat/test-initial_split.R
+++ b/tests/testthat/test-initial_split.R
@@ -18,7 +18,15 @@ test_that("default time param", {
   expect_equal(tr1, dplyr::slice(dat1, 1:floor(nrow(dat1) * 3 / 4)))
 })
 
+test_that("`lag` arg to `initial_time_split()` is deprecated", {
+  expect_snapshot({
+    initial_time_split(mtcars, lag = 2)
+  })
+})
+
 test_that("default time param with lag", {
+  withr::local_options(lifecycle_verbosity = "quiet")
+
   rs1 <- initial_time_split(dat1, lag = 5)
   expect_equal(class(rs1), c("initial_time_split", "initial_split", "rsplit"))
   tr1 <- training(rs1)
@@ -41,6 +49,8 @@ test_that("`initial_time_split()` error messages", {
     initial_time_split(drinks, prop = 2)
   })
 
+  withr::local_options(lifecycle_verbosity = "quiet")
+
   expect_snapshot(error = TRUE, {
     initial_time_split(drinks, lag = 12.5)
   })