This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new ed902cf894f0 [SPARK-53434][SQL][4.0] ColumnarRow's get should also check isNullAt ed902cf894f0 is described below commit ed902cf894f0f8893d4ecff0e2f07f81a12829d2 Author: wangguangxin.cn <wangguangxin...@bytedance.com> AuthorDate: Thu Sep 11 15:45:59 2025 +0800 [SPARK-53434][SQL][4.0] ColumnarRow's get should also check isNullAt ### What changes were proposed in this pull request? Currently, ColumnarRow's `get` call didn't check `isNullAt`, but `UnsafeRow.get` does. https://github.com/apache/spark/blob/b177b6515c8371fe0761b46d2fa45dd5e8465910/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java#L36 And in some cases it's assumed that the `InternalRow.get` is null safe, for example https://github.com/apache/spark/blob/5b2c4cf9ce886b69eeb5d2303d7582f6ecd763aa/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala#L377 We hit it when we extend spark to make it working on columnar data. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually ### Was this patch authored or co-authored using generative AI tooling? No Closes #52301 from WangGuangxin/fix_columnarrow_4.0. Authored-by: wangguangxin.cn <wangguangxin...@bytedance.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/vectorized/ColumnarRow.java | 1 + .../execution/vectorized/ColumnVectorSuite.scala | 24 +++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java index ac05981da5a2..b14cd3429e47 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java @@ -164,6 +164,7 @@ public final class ColumnarRow extends InternalRow { @Override public Object get(int ordinal, DataType dataType) { + if (isNullAt(ordinal)) return null; if (dataType instanceof BooleanType) { return getBoolean(ordinal); } else if (dataType instanceof ByteType) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala index 0edbfd10d8cd..09f2dbfaefc8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala @@ -926,5 +926,27 @@ class ColumnVectorSuite extends SparkFunSuite with SQLHelper { } } } -} + testVectors("SPARK-53434: ColumnarRow.get() should handle null", 1, structType) { testVector => + val c1 = testVector.getChild(0) + val c2 = testVector.getChild(1) + val c3 = testVector.getChild(2) + + // For row 0, set the integer field to null, and other fields to non-null. + c1.putNull(0) + c2.putDouble(0, 3.45) + c3.putLong(0, 1000L) + + val row = testVector.getStruct(0) + + // Verify that get() on the null field returns null. + assert(row.isNullAt(0)) + assert(row.get(0, IntegerType) == null) + + // Verify that other fields can be retrieved correctly. + assert(!row.isNullAt(1)) + assert(row.get(1, DoubleType) === 3.45) + assert(!row.isNullAt(2)) + assert(row.get(2, TimestampNTZType) === 1000L) + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org