This is an automated email from the ASF dual-hosted git repository.
singhpk234 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 571b696106 REST: Fix serde of tasks with multiple deletes (#14573)
571b696106 is described below
commit 571b696106f4748d3edee9ee641e214377ddcb16
Author: Prashant Singh <[email protected]>
AuthorDate: Thu Nov 13 02:43:54 2025 -0800
REST: Fix serde of tasks with multiple deletes (#14573)
Co-authored-by: Prashant Kumar Singh <[email protected]>
---
.../iceberg/rest/TableScanResponseParser.java | 6 +-
.../src/test/java/org/apache/iceberg/TestBase.java | 8 +-
.../responses/TestPlanTableScanResponseParser.java | 131 +++++++++++++++++++++
3 files changed, 138 insertions(+), 7 deletions(-)
diff --git
a/core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java
b/core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java
index 67f71c4184..084e74651a 100644
--- a/core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java
+++ b/core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java
@@ -96,7 +96,7 @@ public class TableScanResponseParser {
gen.writeArrayFieldStart(DELETE_FILES);
for (int i = 0; i < deleteFiles.size(); i++) {
DeleteFile deleteFile = deleteFiles.get(i);
- deleteFilePathToIndex.put(String.valueOf(deleteFile.path()), i);
+ deleteFilePathToIndex.put(deleteFile.location(), i);
ContentFileParser.toJson(deleteFiles.get(i),
specsById.get(deleteFile.specId()), gen);
}
@@ -105,11 +105,11 @@ public class TableScanResponseParser {
if (fileScanTasks != null) {
gen.writeArrayFieldStart(FILE_SCAN_TASKS);
- Set<Integer> deleteFileReferences = Sets.newHashSet();
for (FileScanTask fileScanTask : fileScanTasks) {
+ Set<Integer> deleteFileReferences = Sets.newHashSet();
if (deleteFiles != null) {
for (DeleteFile taskDelete : fileScanTask.deletes()) {
-
deleteFileReferences.add(deleteFilePathToIndex.get(taskDelete.path().toString()));
+
deleteFileReferences.add(deleteFilePathToIndex.get(taskDelete.location()));
}
}
diff --git a/core/src/test/java/org/apache/iceberg/TestBase.java
b/core/src/test/java/org/apache/iceberg/TestBase.java
index 2b91e21262..0929c1bd37 100644
--- a/core/src/test/java/org/apache/iceberg/TestBase.java
+++ b/core/src/test/java/org/apache/iceberg/TestBase.java
@@ -111,7 +111,7 @@ public class TestBase {
.withPartitionPath("data_bucket=0")
.withRecordCount(1)
.build();
- static final DataFile FILE_B =
+ public static final DataFile FILE_B =
DataFiles.builder(SPEC)
.withPath("/path/to/data-b.parquet")
.withFileSizeInBytes(10)
@@ -119,7 +119,7 @@ public class TestBase {
.withRecordCount(1)
.withSplitOffsets(ImmutableList.of(1L))
.build();
- static final DeleteFile FILE_B_DELETES =
+ public static final DeleteFile FILE_B_DELETES =
FileMetadata.deleteFileBuilder(SPEC)
.ofPositionDeletes()
.withPath("/path/to/data-b-deletes.parquet")
@@ -138,7 +138,7 @@ public class TestBase {
.withContentOffset(4)
.withContentSizeInBytes(6)
.build();
- static final DataFile FILE_C =
+ public static final DataFile FILE_C =
DataFiles.builder(SPEC)
.withPath("/path/to/data-c.parquet")
.withFileSizeInBytes(10)
@@ -146,7 +146,7 @@ public class TestBase {
.withRecordCount(1)
.withSplitOffsets(ImmutableList.of(2L, 8L))
.build();
- static final DeleteFile FILE_C2_DELETES =
+ public static final DeleteFile FILE_C2_DELETES =
FileMetadata.deleteFileBuilder(SPEC)
.ofEqualityDeletes(1)
.withPath("/path/to/data-c-deletes.parquet")
diff --git
a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java
b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java
index 5ddedcacae..3a8563ce58 100644
---
a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java
+++
b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java
@@ -20,6 +20,10 @@ package org.apache.iceberg.rest.responses;
import static org.apache.iceberg.TestBase.FILE_A;
import static org.apache.iceberg.TestBase.FILE_A_DELETES;
+import static org.apache.iceberg.TestBase.FILE_B;
+import static org.apache.iceberg.TestBase.FILE_B_DELETES;
+import static org.apache.iceberg.TestBase.FILE_C;
+import static org.apache.iceberg.TestBase.FILE_C2_DELETES;
import static org.apache.iceberg.TestBase.PARTITION_SPECS_BY_ID;
import static org.apache.iceberg.TestBase.SCHEMA;
import static org.apache.iceberg.TestBase.SPEC;
@@ -265,6 +269,133 @@ public class TestPlanTableScanResponseParser {
assertThat(PlanTableScanResponseParser.toJson(copyResponse)).isEqualTo(expectedToJson);
}
+ @Test
+ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() {
+ ResidualEvaluator residualEvaluator =
+ ResidualEvaluator.of(SPEC, Expressions.alwaysTrue(), true);
+
+ // Create three tasks, each with its own distinct delete file
+ FileScanTask taskA =
+ new BaseFileScanTask(
+ FILE_A,
+ new DeleteFile[] {FILE_A_DELETES},
+ SchemaParser.toJson(SCHEMA),
+ PartitionSpecParser.toJson(SPEC),
+ residualEvaluator);
+
+ FileScanTask taskB =
+ new BaseFileScanTask(
+ FILE_B,
+ new DeleteFile[] {FILE_B_DELETES},
+ SchemaParser.toJson(SCHEMA),
+ PartitionSpecParser.toJson(SPEC),
+ residualEvaluator);
+
+ FileScanTask taskC =
+ new BaseFileScanTask(
+ FILE_C,
+ new DeleteFile[] {FILE_C2_DELETES},
+ SchemaParser.toJson(SCHEMA),
+ PartitionSpecParser.toJson(SPEC),
+ residualEvaluator);
+
+ PlanTableScanResponse response =
+ PlanTableScanResponse.builder()
+ .withPlanStatus(PlanStatus.COMPLETED)
+ .withFileScanTasks(List.of(taskA, taskB, taskC))
+ .withDeleteFiles(List.of(FILE_A_DELETES, FILE_B_DELETES,
FILE_C2_DELETES))
+ .withSpecsById(PARTITION_SPECS_BY_ID)
+ .build();
+
+ String expectedJson =
+ "{\n"
+ + " \"plan-status\" : \"completed\",\n"
+ + " \"delete-files\" : [ {\n"
+ + " \"spec-id\" : 0,\n"
+ + " \"content\" : \"POSITION_DELETES\",\n"
+ + " \"file-path\" : \"/path/to/data-a-deletes.parquet\",\n"
+ + " \"file-format\" : \"PARQUET\",\n"
+ + " \"partition\" : {\n"
+ + " \"1000\" : 0\n"
+ + " },\n"
+ + " \"file-size-in-bytes\" : 10,\n"
+ + " \"record-count\" : 1\n"
+ + " }, {\n"
+ + " \"spec-id\" : 0,\n"
+ + " \"content\" : \"POSITION_DELETES\",\n"
+ + " \"file-path\" : \"/path/to/data-b-deletes.parquet\",\n"
+ + " \"file-format\" : \"PARQUET\",\n"
+ + " \"partition\" : {\n"
+ + " \"1000\" : 1\n"
+ + " },\n"
+ + " \"file-size-in-bytes\" : 10,\n"
+ + " \"record-count\" : 1\n"
+ + " }, {\n"
+ + " \"spec-id\" : 0,\n"
+ + " \"content\" : \"EQUALITY_DELETES\",\n"
+ + " \"file-path\" : \"/path/to/data-c-deletes.parquet\",\n"
+ + " \"file-format\" : \"PARQUET\",\n"
+ + " \"partition\" : {\n"
+ + " \"1000\" : 2\n"
+ + " },\n"
+ + " \"file-size-in-bytes\" : 10,\n"
+ + " \"record-count\" : 1,\n"
+ + " \"equality-ids\" : [ 1 ],\n"
+ + " \"sort-order-id\" : 0\n"
+ + " } ],\n"
+ + " \"file-scan-tasks\" : [ {\n"
+ + " \"data-file\" : {\n"
+ + " \"spec-id\" : 0,\n"
+ + " \"content\" : \"DATA\",\n"
+ + " \"file-path\" : \"/path/to/data-a.parquet\",\n"
+ + " \"file-format\" : \"PARQUET\",\n"
+ + " \"partition\" : {\n"
+ + " \"1000\" : 0\n"
+ + " },\n"
+ + " \"file-size-in-bytes\" : 10,\n"
+ + " \"record-count\" : 1,\n"
+ + " \"sort-order-id\" : 0\n"
+ + " },\n"
+ + " \"delete-file-references\" : [ 0 ],\n"
+ + " \"residual-filter\" : true\n"
+ + " }, {\n"
+ + " \"data-file\" : {\n"
+ + " \"spec-id\" : 0,\n"
+ + " \"content\" : \"DATA\",\n"
+ + " \"file-path\" : \"/path/to/data-b.parquet\",\n"
+ + " \"file-format\" : \"PARQUET\",\n"
+ + " \"partition\" : {\n"
+ + " \"1000\" : 1\n"
+ + " },\n"
+ + " \"file-size-in-bytes\" : 10,\n"
+ + " \"record-count\" : 1,\n"
+ + " \"split-offsets\" : [ 1 ],\n"
+ + " \"sort-order-id\" : 0\n"
+ + " },\n"
+ + " \"delete-file-references\" : [ 1 ],\n"
+ + " \"residual-filter\" : true\n"
+ + " }, {\n"
+ + " \"data-file\" : {\n"
+ + " \"spec-id\" : 0,\n"
+ + " \"content\" : \"DATA\",\n"
+ + " \"file-path\" : \"/path/to/data-c.parquet\",\n"
+ + " \"file-format\" : \"PARQUET\",\n"
+ + " \"partition\" : {\n"
+ + " \"1000\" : 2\n"
+ + " },\n"
+ + " \"file-size-in-bytes\" : 10,\n"
+ + " \"record-count\" : 1,\n"
+ + " \"split-offsets\" : [ 2, 8 ],\n"
+ + " \"sort-order-id\" : 0\n"
+ + " },\n"
+ + " \"delete-file-references\" : [ 2 ],\n"
+ + " \"residual-filter\" : true\n"
+ + " } ]\n"
+ + "}";
+ String json = PlanTableScanResponseParser.toJson(response, true);
+ assertThat(json).isEqualTo(expectedJson);
+ }
+
@Test
public void roundTripSerdeWithoutDeleteFiles() {
ResidualEvaluator residualEvaluator =