This is an automated email from the ASF dual-hosted git repository.
aokolnychyi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 3f6d3de049 Spark 4.1: Use table IDs in scan equals/hashCode (#15363)
3f6d3de049 is described below
commit 3f6d3de04961fbf7455e4e8d3ee74ff5922cc122
Author: Anton Okolnychyi <[email protected]>
AuthorDate: Wed Feb 18 22:57:17 2026 -0800
Spark 4.1: Use table IDs in scan equals/hashCode (#15363)
---
.../org/apache/iceberg/spark/source/SparkBatchQueryScan.java | 2 ++
.../java/org/apache/iceberg/spark/source/SparkChangelogScan.java | 4 +++-
.../org/apache/iceberg/spark/source/SparkCopyOnWriteScan.java | 8 +++++++-
.../java/org/apache/iceberg/spark/source/SparkStagedScan.java | 9 ++++++++-
4 files changed, 20 insertions(+), 3 deletions(-)
diff --git
a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkBatchQueryScan.java
b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkBatchQueryScan.java
index 0ec77d9d06..0db1df3aeb 100644
---
a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkBatchQueryScan.java
+++
b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkBatchQueryScan.java
@@ -253,6 +253,7 @@ class SparkBatchQueryScan extends
SparkPartitioningAwareScan<PartitionScanTask>
SparkBatchQueryScan that = (SparkBatchQueryScan) o;
return table().name().equals(that.table().name())
+ && Objects.equals(table().uuid(), that.table().uuid())
&& Objects.equals(branch(), that.branch())
&& readSchema().equals(that.readSchema()) // compare Spark schemas to
ignore field ids
&& filtersDesc().equals(that.filtersDesc())
@@ -268,6 +269,7 @@ class SparkBatchQueryScan extends
SparkPartitioningAwareScan<PartitionScanTask>
public int hashCode() {
return Objects.hash(
table().name(),
+ table().uuid(),
branch(),
readSchema(),
filtersDesc(),
diff --git
a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkChangelogScan.java
b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkChangelogScan.java
index eb4659f3eb..c6c5edf45c 100644
---
a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkChangelogScan.java
+++
b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkChangelogScan.java
@@ -148,6 +148,7 @@ class SparkChangelogScan implements Scan,
SupportsReportStatistics {
SparkChangelogScan that = (SparkChangelogScan) o;
return table.name().equals(that.table.name())
+ && Objects.equals(table.uuid(), that.table.uuid())
&& readSchema().equals(that.readSchema()) // compare Spark schemas to
ignore field IDs
&& filtersDesc().equals(that.filtersDesc())
&& Objects.equals(startSnapshotId, that.startSnapshotId)
@@ -156,7 +157,8 @@ class SparkChangelogScan implements Scan,
SupportsReportStatistics {
@Override
public int hashCode() {
- return Objects.hash(table.name(), readSchema(), filtersDesc(),
startSnapshotId, endSnapshotId);
+ return Objects.hash(
+ table.name(), table.uuid(), readSchema(), filtersDesc(),
startSnapshotId, endSnapshotId);
}
private String filtersDesc() {
diff --git
a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkCopyOnWriteScan.java
b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkCopyOnWriteScan.java
index 38664ce8bb..a0b531b3a5 100644
---
a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkCopyOnWriteScan.java
+++
b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkCopyOnWriteScan.java
@@ -161,6 +161,7 @@ class SparkCopyOnWriteScan extends
SparkPartitioningAwareScan<FileScanTask>
SparkCopyOnWriteScan that = (SparkCopyOnWriteScan) o;
return table().name().equals(that.table().name())
+ && Objects.equals(table().uuid(), that.table().uuid())
&& readSchema().equals(that.readSchema()) // compare Spark schemas to
ignore field ids
&& filtersDesc().equals(that.filtersDesc())
&& Objects.equals(snapshotId(), that.snapshotId())
@@ -170,7 +171,12 @@ class SparkCopyOnWriteScan extends
SparkPartitioningAwareScan<FileScanTask>
@Override
public int hashCode() {
return Objects.hash(
- table().name(), readSchema(), filtersDesc(), snapshotId(),
filteredLocations);
+ table().name(),
+ table().uuid(),
+ readSchema(),
+ filtersDesc(),
+ snapshotId(),
+ filteredLocations);
}
@Override
diff --git
a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkStagedScan.java
b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkStagedScan.java
index 435c2cbd15..db12ccb23f 100644
---
a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkStagedScan.java
+++
b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/source/SparkStagedScan.java
@@ -91,6 +91,7 @@ class SparkStagedScan extends SparkScan {
SparkStagedScan that = (SparkStagedScan) other;
return table().name().equals(that.table().name())
+ && Objects.equals(table().uuid(), that.table().uuid())
&& Objects.equals(taskSetId, that.taskSetId)
&& readSchema().equals(that.readSchema())
&& splitSize == that.splitSize
@@ -101,7 +102,13 @@ class SparkStagedScan extends SparkScan {
@Override
public int hashCode() {
return Objects.hash(
- table().name(), taskSetId, readSchema(), splitSize, splitLookback,
openFileCost);
+ table().name(),
+ table().uuid(),
+ taskSetId,
+ readSchema(),
+ splitSize,
+ splitLookback,
+ openFileCost);
}
@Override