This is an automated email from the ASF dual-hosted git repository. etudenhoefner pushed a commit to branch 1.10.x in repository https://gitbox.apache.org/repos/asf/iceberg.git
commit 7fc93ec7e8b8e38356f2ffd13e0ca87b561a5f46 Author: Prashant Singh <[email protected]> AuthorDate: Thu Nov 13 02:43:54 2025 -0800 REST: Fix serde of tasks with multiple deletes (#14573) Co-authored-by: Prashant Kumar Singh <[email protected]> --- .../iceberg/rest/TableScanResponseParser.java | 6 +- .../src/test/java/org/apache/iceberg/TestBase.java | 8 +- .../responses/TestPlanTableScanResponseParser.java | 131 +++++++++++++++++++++ 3 files changed, 138 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java index 67f71c4184..084e74651a 100644 --- a/core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java @@ -96,7 +96,7 @@ public class TableScanResponseParser { gen.writeArrayFieldStart(DELETE_FILES); for (int i = 0; i < deleteFiles.size(); i++) { DeleteFile deleteFile = deleteFiles.get(i); - deleteFilePathToIndex.put(String.valueOf(deleteFile.path()), i); + deleteFilePathToIndex.put(deleteFile.location(), i); ContentFileParser.toJson(deleteFiles.get(i), specsById.get(deleteFile.specId()), gen); } @@ -105,11 +105,11 @@ public class TableScanResponseParser { if (fileScanTasks != null) { gen.writeArrayFieldStart(FILE_SCAN_TASKS); - Set<Integer> deleteFileReferences = Sets.newHashSet(); for (FileScanTask fileScanTask : fileScanTasks) { + Set<Integer> deleteFileReferences = Sets.newHashSet(); if (deleteFiles != null) { for (DeleteFile taskDelete : fileScanTask.deletes()) { - deleteFileReferences.add(deleteFilePathToIndex.get(taskDelete.path().toString())); + deleteFileReferences.add(deleteFilePathToIndex.get(taskDelete.location())); } } diff --git a/core/src/test/java/org/apache/iceberg/TestBase.java b/core/src/test/java/org/apache/iceberg/TestBase.java index 30c1fb7191..caca0022a7 100644 --- a/core/src/test/java/org/apache/iceberg/TestBase.java +++ b/core/src/test/java/org/apache/iceberg/TestBase.java @@ -110,7 +110,7 @@ public class TestBase { .withPartitionPath("data_bucket=0") .withRecordCount(1) .build(); - static final DataFile FILE_B = + public static final DataFile FILE_B = DataFiles.builder(SPEC) .withPath("/path/to/data-b.parquet") .withFileSizeInBytes(10) @@ -118,7 +118,7 @@ public class TestBase { .withRecordCount(1) .withSplitOffsets(ImmutableList.of(1L)) .build(); - static final DeleteFile FILE_B_DELETES = + public static final DeleteFile FILE_B_DELETES = FileMetadata.deleteFileBuilder(SPEC) .ofPositionDeletes() .withPath("/path/to/data-b-deletes.parquet") @@ -137,7 +137,7 @@ public class TestBase { .withContentOffset(4) .withContentSizeInBytes(6) .build(); - static final DataFile FILE_C = + public static final DataFile FILE_C = DataFiles.builder(SPEC) .withPath("/path/to/data-c.parquet") .withFileSizeInBytes(10) @@ -145,7 +145,7 @@ public class TestBase { .withRecordCount(1) .withSplitOffsets(ImmutableList.of(2L, 8L)) .build(); - static final DeleteFile FILE_C2_DELETES = + public static final DeleteFile FILE_C2_DELETES = FileMetadata.deleteFileBuilder(SPEC) .ofEqualityDeletes(1) .withPath("/path/to/data-c-deletes.parquet") diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java index 5ddedcacae..3a8563ce58 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java @@ -20,6 +20,10 @@ package org.apache.iceberg.rest.responses; import static org.apache.iceberg.TestBase.FILE_A; import static org.apache.iceberg.TestBase.FILE_A_DELETES; +import static org.apache.iceberg.TestBase.FILE_B; +import static org.apache.iceberg.TestBase.FILE_B_DELETES; +import static org.apache.iceberg.TestBase.FILE_C; +import static org.apache.iceberg.TestBase.FILE_C2_DELETES; import static org.apache.iceberg.TestBase.PARTITION_SPECS_BY_ID; import static org.apache.iceberg.TestBase.SCHEMA; import static org.apache.iceberg.TestBase.SPEC; @@ -265,6 +269,133 @@ public class TestPlanTableScanResponseParser { assertThat(PlanTableScanResponseParser.toJson(copyResponse)).isEqualTo(expectedToJson); } + @Test + public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + ResidualEvaluator residualEvaluator = + ResidualEvaluator.of(SPEC, Expressions.alwaysTrue(), true); + + // Create three tasks, each with its own distinct delete file + FileScanTask taskA = + new BaseFileScanTask( + FILE_A, + new DeleteFile[] {FILE_A_DELETES}, + SchemaParser.toJson(SCHEMA), + PartitionSpecParser.toJson(SPEC), + residualEvaluator); + + FileScanTask taskB = + new BaseFileScanTask( + FILE_B, + new DeleteFile[] {FILE_B_DELETES}, + SchemaParser.toJson(SCHEMA), + PartitionSpecParser.toJson(SPEC), + residualEvaluator); + + FileScanTask taskC = + new BaseFileScanTask( + FILE_C, + new DeleteFile[] {FILE_C2_DELETES}, + SchemaParser.toJson(SCHEMA), + PartitionSpecParser.toJson(SPEC), + residualEvaluator); + + PlanTableScanResponse response = + PlanTableScanResponse.builder() + .withPlanStatus(PlanStatus.COMPLETED) + .withFileScanTasks(List.of(taskA, taskB, taskC)) + .withDeleteFiles(List.of(FILE_A_DELETES, FILE_B_DELETES, FILE_C2_DELETES)) + .withSpecsById(PARTITION_SPECS_BY_ID) + .build(); + + String expectedJson = + "{\n" + + " \"plan-status\" : \"completed\",\n" + + " \"delete-files\" : [ {\n" + + " \"spec-id\" : 0,\n" + + " \"content\" : \"POSITION_DELETES\",\n" + + " \"file-path\" : \"/path/to/data-a-deletes.parquet\",\n" + + " \"file-format\" : \"PARQUET\",\n" + + " \"partition\" : {\n" + + " \"1000\" : 0\n" + + " },\n" + + " \"file-size-in-bytes\" : 10,\n" + + " \"record-count\" : 1\n" + + " }, {\n" + + " \"spec-id\" : 0,\n" + + " \"content\" : \"POSITION_DELETES\",\n" + + " \"file-path\" : \"/path/to/data-b-deletes.parquet\",\n" + + " \"file-format\" : \"PARQUET\",\n" + + " \"partition\" : {\n" + + " \"1000\" : 1\n" + + " },\n" + + " \"file-size-in-bytes\" : 10,\n" + + " \"record-count\" : 1\n" + + " }, {\n" + + " \"spec-id\" : 0,\n" + + " \"content\" : \"EQUALITY_DELETES\",\n" + + " \"file-path\" : \"/path/to/data-c-deletes.parquet\",\n" + + " \"file-format\" : \"PARQUET\",\n" + + " \"partition\" : {\n" + + " \"1000\" : 2\n" + + " },\n" + + " \"file-size-in-bytes\" : 10,\n" + + " \"record-count\" : 1,\n" + + " \"equality-ids\" : [ 1 ],\n" + + " \"sort-order-id\" : 0\n" + + " } ],\n" + + " \"file-scan-tasks\" : [ {\n" + + " \"data-file\" : {\n" + + " \"spec-id\" : 0,\n" + + " \"content\" : \"DATA\",\n" + + " \"file-path\" : \"/path/to/data-a.parquet\",\n" + + " \"file-format\" : \"PARQUET\",\n" + + " \"partition\" : {\n" + + " \"1000\" : 0\n" + + " },\n" + + " \"file-size-in-bytes\" : 10,\n" + + " \"record-count\" : 1,\n" + + " \"sort-order-id\" : 0\n" + + " },\n" + + " \"delete-file-references\" : [ 0 ],\n" + + " \"residual-filter\" : true\n" + + " }, {\n" + + " \"data-file\" : {\n" + + " \"spec-id\" : 0,\n" + + " \"content\" : \"DATA\",\n" + + " \"file-path\" : \"/path/to/data-b.parquet\",\n" + + " \"file-format\" : \"PARQUET\",\n" + + " \"partition\" : {\n" + + " \"1000\" : 1\n" + + " },\n" + + " \"file-size-in-bytes\" : 10,\n" + + " \"record-count\" : 1,\n" + + " \"split-offsets\" : [ 1 ],\n" + + " \"sort-order-id\" : 0\n" + + " },\n" + + " \"delete-file-references\" : [ 1 ],\n" + + " \"residual-filter\" : true\n" + + " }, {\n" + + " \"data-file\" : {\n" + + " \"spec-id\" : 0,\n" + + " \"content\" : \"DATA\",\n" + + " \"file-path\" : \"/path/to/data-c.parquet\",\n" + + " \"file-format\" : \"PARQUET\",\n" + + " \"partition\" : {\n" + + " \"1000\" : 2\n" + + " },\n" + + " \"file-size-in-bytes\" : 10,\n" + + " \"record-count\" : 1,\n" + + " \"split-offsets\" : [ 2, 8 ],\n" + + " \"sort-order-id\" : 0\n" + + " },\n" + + " \"delete-file-references\" : [ 2 ],\n" + + " \"residual-filter\" : true\n" + + " } ]\n" + + "}"; + String json = PlanTableScanResponseParser.toJson(response, true); + assertThat(json).isEqualTo(expectedJson); + } + @Test public void roundTripSerdeWithoutDeleteFiles() { ResidualEvaluator residualEvaluator =
