This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new 90b01fc [SPARK-38628][SQL] Complete the copy method in subclasses of InternalRow, ArrayData, and MapData to safely copy their instances 90b01fc is described below commit 90b01fc12b2eb24d5d864fc89883889e36a194ab Author: Takuya UESHIN <ues...@databricks.com> AuthorDate: Wed Mar 23 19:43:44 2022 +0900 [SPARK-38628][SQL] Complete the copy method in subclasses of InternalRow, ArrayData, and MapData to safely copy their instances ### What changes were proposed in this pull request? Completes the `copy` method in subclasses of `InternalRow`, `ArrayData`, and `MapData` to safely copy their instances. ### Why are the changes needed? Some subclasses of `InternalRow`, `ArrayData`, and `MapData` missing support for `StructType`, `ArrayType`, and `MapType` in their copy method. We should complete them to safely copy their instances and prevent potential issues. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #35942 from ueshin/issues/SPARK-38628/copy. Authored-by: Takuya UESHIN <ues...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit 861e8b4a8ba784da1a69bd6522a0a7fdac5d1091) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../main/java/org/apache/spark/sql/vectorized/ColumnarArray.java | 2 +- .../main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java | 6 ++++++ .../src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java | 6 ++++++ .../apache/spark/sql/execution/vectorized/MutableColumnarRow.java | 6 ++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java index 2fb6b3f..bd7c3d7 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java @@ -68,7 +68,7 @@ public final class ColumnarArray extends ArrayData { } else if (dt instanceof DoubleType) { return UnsafeArrayData.fromPrimitiveArray(toDoubleArray()); } else { - return new GenericArrayData(toObjectArray(dt)); + return new GenericArrayData(toObjectArray(dt)).copy(); // ensure the elements are copied. } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java index 8c32d5c..7f84126 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java @@ -71,6 +71,12 @@ public final class ColumnarBatchRow extends InternalRow { row.setInt(i, getInt(i)); } else if (dt instanceof TimestampType) { row.setLong(i, getLong(i)); + } else if (dt instanceof StructType) { + row.update(i, getStruct(i, ((StructType) dt).fields().length).copy()); + } else if (dt instanceof ArrayType) { + row.update(i, getArray(i).copy()); + } else if (dt instanceof MapType) { + row.update(i, getMap(i).copy()); } else { throw new RuntimeException("Not implemented. " + dt); } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java index da4b242..fd4e8ff 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java @@ -80,6 +80,12 @@ public final class ColumnarRow extends InternalRow { row.setInt(i, getInt(i)); } else if (dt instanceof TimestampType) { row.setLong(i, getLong(i)); + } else if (dt instanceof StructType) { + row.update(i, getStruct(i, ((StructType) dt).fields().length).copy()); + } else if (dt instanceof ArrayType) { + row.update(i, getArray(i).copy()); + } else if (dt instanceof MapType) { + row.update(i, getMap(i).copy()); } else { throw new RuntimeException("Not implemented. " + dt); } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java index f4fdf50..64568f1 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java @@ -80,6 +80,12 @@ public final class MutableColumnarRow extends InternalRow { row.setInt(i, getInt(i)); } else if (dt instanceof TimestampType) { row.setLong(i, getLong(i)); + } else if (dt instanceof StructType) { + row.update(i, getStruct(i, ((StructType) dt).fields().length).copy()); + } else if (dt instanceof ArrayType) { + row.update(i, getArray(i).copy()); + } else if (dt instanceof MapType) { + row.update(i, getMap(i).copy()); } else { throw new RuntimeException("Not implemented. " + dt); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org