Skip to content

Commit 1bb4143

Browse files
vitaliili-dbcloud-fan
authored andcommitted
[SPARK-45430] Fix for FramelessOffsetWindowFunction when IGNORE NULLS and offset > rowCount
### What changes were proposed in this pull request? This is a fix for the failure when function that utilized `FramelessOffsetWindowFunctionFrame` is used with `ignoreNulls = true` and `offset > rowCount`. e.g. ``` select x, lead(x, 5) IGNORE NULLS over (order by x) from (select explode(sequence(1, 3)) x) ``` ### Why are the changes needed? Fix existing bug ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Modify existing unit test to cover this case ### Was this patch authored or co-authored using generative AI tooling? No Closes #43236 from vitaliili-db/SPARK-45430. Authored-by: Vitalii Li <[email protected]> Signed-off-by: Wenchen Fan <[email protected]> (cherry picked from commit 32e1e58) Signed-off-by: Wenchen Fan <[email protected]>
1 parent 746f936 commit 1bb4143

File tree

2 files changed

+27
-19
lines changed

2 files changed

+27
-19
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,11 @@ class FrameLessOffsetWindowFunctionFrame(
201201
override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
202202
resetStates(rows)
203203
if (ignoreNulls) {
204-
findNextRowWithNonNullInput()
204+
if (Math.abs(offset) > rows.length) {
205+
fillDefaultValue(EmptyRow)
206+
} else {
207+
findNextRowWithNonNullInput()
208+
}
205209
} else {
206210
// drain the first few rows if offset is larger than zero
207211
while (inputIndex < offset) {

sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -814,34 +814,38 @@ class DataFrameWindowFunctionsSuite extends QueryTest
814814
lead($"value", 1, null, true).over(window),
815815
lead($"value", 2, null, true).over(window),
816816
lead($"value", 3, null, true).over(window),
817+
// offset > rowCount: SPARK-45430
818+
lead($"value", 100, null, true).over(window),
817819
lead(concat($"value", $"key"), 1, null, true).over(window),
818820
lag($"value", 1).over(window),
819821
lag($"value", 2).over(window),
820822
lag($"value", 0, null, true).over(window),
821823
lag($"value", 1, null, true).over(window),
822824
lag($"value", 2, null, true).over(window),
823825
lag($"value", 3, null, true).over(window),
826+
// abs(offset) > rowCount: SPARK-45430
827+
lag($"value", -100, null, true).over(window),
824828
lag(concat($"value", $"key"), 1, null, true).over(window))
825829
.orderBy($"order"),
826830
Seq(
827-
Row("a", 0, null, "x", null, null, "x", "y", "z", "xa",
828-
null, null, null, null, null, null, null),
829-
Row("a", 1, "x", null, null, "x", "y", "z", "v", "ya",
830-
null, null, "x", null, null, null, null),
831-
Row("b", 2, null, null, "y", null, "y", "z", "v", "ya",
832-
"x", null, null, "x", null, null, "xa"),
833-
Row("c", 3, null, "y", null, null, "y", "z", "v", "ya",
834-
null, "x", null, "x", null, null, "xa"),
835-
Row("a", 4, "y", null, "z", "y", "z", "v", null, "za",
836-
null, null, "y", "x", null, null, "xa"),
837-
Row("b", 5, null, "z", "v", null, "z", "v", null, "za",
838-
"y", null, null, "y", "x", null, "ya"),
839-
Row("a", 6, "z", "v", null, "z", "v", null, null, "va",
840-
null, "y", "z", "y", "x", null, "ya"),
841-
Row("a", 7, "v", null, null, "v", null, null, null, null,
842-
"z", null, "v", "z", "y", "x", "za"),
843-
Row("a", 8, null, null, null, null, null, null, null, null,
844-
"v", "z", null, "v", "z", "y", "va")))
831+
Row("a", 0, null, "x", null, null, "x", "y", "z", null, "xa",
832+
null, null, null, null, null, null, null, null),
833+
Row("a", 1, "x", null, null, "x", "y", "z", "v", null, "ya",
834+
null, null, "x", null, null, null, null, null),
835+
Row("b", 2, null, null, "y", null, "y", "z", "v", null, "ya",
836+
"x", null, null, "x", null, null, null, "xa"),
837+
Row("c", 3, null, "y", null, null, "y", "z", "v", null, "ya",
838+
null, "x", null, "x", null, null, null, "xa"),
839+
Row("a", 4, "y", null, "z", "y", "z", "v", null, null, "za",
840+
null, null, "y", "x", null, null, null, "xa"),
841+
Row("b", 5, null, "z", "v", null, "z", "v", null, null, "za",
842+
"y", null, null, "y", "x", null, null, "ya"),
843+
Row("a", 6, "z", "v", null, "z", "v", null, null, null, "va",
844+
null, "y", "z", "y", "x", null, null, "ya"),
845+
Row("a", 7, "v", null, null, "v", null, null, null, null, null,
846+
"z", null, "v", "z", "y", "x", null, "za"),
847+
Row("a", 8, null, null, null, null, null, null, null, null, null,
848+
"v", "z", null, "v", "z", "y", null, "va")))
845849
}
846850

847851
test("SPARK-12989 ExtractWindowExpressions treats alias as regular attribute") {

0 commit comments

Comments
 (0)