This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new fca74a08c6 Spark: Backport Add _row_id and
_last_updated_sequence_number raeder in Orc to support lineage (#16534)
fca74a08c6 is described below
commit fca74a08c6e0a08646647dcbda9a63fe90ee88d5
Author: GuoYu <[email protected]>
AuthorDate: Sat May 23 02:43:23 2026 +0800
Spark: Backport Add _row_id and _last_updated_sequence_number raeder in Orc
to support lineage (#16534)
---
.../extensions/TestRowLevelOperationsWithLineage.java | 12 ++++++++++++
.../org/apache/iceberg/spark/data/SparkOrcReader.java | 2 +-
.../apache/iceberg/spark/data/SparkOrcValueReaders.java | 15 +++++++++++----
.../extensions/TestRowLevelOperationsWithLineage.java | 12 ++++++++++++
.../org/apache/iceberg/spark/data/SparkOrcReader.java | 2 +-
.../apache/iceberg/spark/data/SparkOrcValueReaders.java | 15 +++++++++++----
6 files changed, 48 insertions(+), 10 deletions(-)
diff --git
a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRowLevelOperationsWithLineage.java
b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRowLevelOperationsWithLineage.java
index c1a9a26f84..63458c64a5 100644
---
a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRowLevelOperationsWithLineage.java
+++
b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRowLevelOperationsWithLineage.java
@@ -95,6 +95,18 @@ public abstract class TestRowLevelOperationsWithLineage
extends SparkRowLevelOpe
+ " fanout = {6}, branch = {7}, planningMode = {8},
formatVersion = {9}")
public static Object[][] parameters() {
return new Object[][] {
+ {
+ "testhadoop",
+ SparkCatalog.class.getName(),
+ ImmutableMap.of("type", "hadoop"),
+ FileFormat.ORC,
+ false,
+ WRITE_DISTRIBUTION_MODE_HASH,
+ true,
+ null,
+ LOCAL,
+ 3
+ },
{
"testhadoop",
SparkCatalog.class.getName(),
diff --git
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcReader.java
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcReader.java
index c20be44f67..c0d3d3efe0 100644
---
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcReader.java
+++
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcReader.java
@@ -77,7 +77,7 @@ public class SparkOrcReader implements
OrcRowReader<InternalRow> {
TypeDescription record,
List<String> names,
List<OrcValueReader<?>> fields) {
- return SparkOrcValueReaders.struct(fields, expected, idToConstant);
+ return SparkOrcValueReaders.struct(record, fields, expected,
idToConstant);
}
@Override
diff --git
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
index 670537fbf8..67664ac6c7 100644
---
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
+++
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
@@ -28,6 +28,7 @@ import
org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.UUIDUtil;
+import org.apache.orc.TypeDescription;
import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
import org.apache.orc.storage.ql.exec.vector.ColumnVector;
import org.apache.orc.storage.ql.exec.vector.DecimalColumnVector;
@@ -70,8 +71,11 @@ public class SparkOrcValueReaders {
}
static OrcValueReader<?> struct(
- List<OrcValueReader<?>> readers, Types.StructType struct, Map<Integer,
?> idToConstant) {
- return new StructReader(readers, struct, idToConstant);
+ TypeDescription record,
+ List<OrcValueReader<?>> readers,
+ Types.StructType struct,
+ Map<Integer, ?> idToConstant) {
+ return new StructReader(record, readers, struct, idToConstant);
}
static OrcValueReader<?> array(OrcValueReader<?> elementReader) {
@@ -143,8 +147,11 @@ public class SparkOrcValueReaders {
private final int numFields;
protected StructReader(
- List<OrcValueReader<?>> readers, Types.StructType struct, Map<Integer,
?> idToConstant) {
- super(readers, struct, idToConstant);
+ TypeDescription record,
+ List<OrcValueReader<?>> readers,
+ Types.StructType struct,
+ Map<Integer, ?> idToConstant) {
+ super(record, readers, struct, idToConstant);
this.numFields = struct.fields().size();
}
diff --git
a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRowLevelOperationsWithLineage.java
b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRowLevelOperationsWithLineage.java
index 7730368523..f38178a8e8 100644
---
a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRowLevelOperationsWithLineage.java
+++
b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRowLevelOperationsWithLineage.java
@@ -95,6 +95,18 @@ public abstract class TestRowLevelOperationsWithLineage
extends SparkRowLevelOpe
+ " fanout = {6}, branch = {7}, planningMode = {8},
formatVersion = {9}")
public static Object[][] parameters() {
return new Object[][] {
+ {
+ "testhadoop",
+ SparkCatalog.class.getName(),
+ ImmutableMap.of("type", "hadoop"),
+ FileFormat.ORC,
+ false,
+ WRITE_DISTRIBUTION_MODE_HASH,
+ true,
+ null,
+ LOCAL,
+ 3
+ },
{
"testhadoop",
SparkCatalog.class.getName(),
diff --git
a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcReader.java
b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcReader.java
index c20be44f67..c0d3d3efe0 100644
---
a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcReader.java
+++
b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcReader.java
@@ -77,7 +77,7 @@ public class SparkOrcReader implements
OrcRowReader<InternalRow> {
TypeDescription record,
List<String> names,
List<OrcValueReader<?>> fields) {
- return SparkOrcValueReaders.struct(fields, expected, idToConstant);
+ return SparkOrcValueReaders.struct(record, fields, expected,
idToConstant);
}
@Override
diff --git
a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
index 670537fbf8..67664ac6c7 100644
---
a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
+++
b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
@@ -28,6 +28,7 @@ import
org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.UUIDUtil;
+import org.apache.orc.TypeDescription;
import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
import org.apache.orc.storage.ql.exec.vector.ColumnVector;
import org.apache.orc.storage.ql.exec.vector.DecimalColumnVector;
@@ -70,8 +71,11 @@ public class SparkOrcValueReaders {
}
static OrcValueReader<?> struct(
- List<OrcValueReader<?>> readers, Types.StructType struct, Map<Integer,
?> idToConstant) {
- return new StructReader(readers, struct, idToConstant);
+ TypeDescription record,
+ List<OrcValueReader<?>> readers,
+ Types.StructType struct,
+ Map<Integer, ?> idToConstant) {
+ return new StructReader(record, readers, struct, idToConstant);
}
static OrcValueReader<?> array(OrcValueReader<?> elementReader) {
@@ -143,8 +147,11 @@ public class SparkOrcValueReaders {
private final int numFields;
protected StructReader(
- List<OrcValueReader<?>> readers, Types.StructType struct, Map<Integer,
?> idToConstant) {
- super(readers, struct, idToConstant);
+ TypeDescription record,
+ List<OrcValueReader<?>> readers,
+ Types.StructType struct,
+ Map<Integer, ?> idToConstant) {
+ super(record, readers, struct, idToConstant);
this.numFields = struct.fields().size();
}