This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new b7181e8f76 Fix incorrect partition bounds calculation in manifest on
deletion (#15127)
b7181e8f76 is described below
commit b7181e8f765fc3ff188c325ba422b347f59d271e
Author: Dong Wang <[email protected]>
AuthorDate: Thu Jan 29 05:23:11 2026 +0800
Fix incorrect partition bounds calculation in manifest on deletion (#15127)
When deleting data files through `DeleteFiles.deleteFromRowFilter`,
the manifest file was updated with incorrect partition boundary values
if the target table is partitioned by columns of `binary` type.
This is mainly because in `PartitionSummary.updateFields`, the update
of `PartitionFieldStats`'s min/max field directly references a byte
array that could be reused by `ManifestReader` when reading multiple
files.
---
.../java/org/apache/iceberg/PartitionData.java | 3 ++-
.../apache/iceberg/spark/sql/TestDeleteFrom.java | 23 ++++++++++++++++++++++
.../apache/iceberg/spark/sql/TestDeleteFrom.java | 23 ++++++++++++++++++++++
.../apache/iceberg/spark/sql/TestDeleteFrom.java | 23 ++++++++++++++++++++++
4 files changed, 71 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/iceberg/PartitionData.java
b/core/src/main/java/org/apache/iceberg/PartitionData.java
index 41bc4c0c12..41ad72bf0b 100644
--- a/core/src/main/java/org/apache/iceberg/PartitionData.java
+++ b/core/src/main/java/org/apache/iceberg/PartitionData.java
@@ -135,7 +135,8 @@ public class PartitionData
}
if (data[pos] instanceof byte[]) {
- return ByteBuffer.wrap((byte[]) data[pos]);
+ byte[] copied = Arrays.copyOf((byte[]) data[pos], ((byte[])
data[pos]).length);
+ return ByteBuffer.wrap(copied);
}
return data[pos];
diff --git
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
index bd4a41593c..2389bcc173 100644
---
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
+++
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
@@ -162,4 +162,27 @@ public class TestDeleteFrom extends CatalogTestBase {
ImmutableList.of(),
sql("SELECT * FROM %s ORDER BY id", tableName));
}
+
+ @TestTemplate
+ public void testDeleteFromTablePartitionedByVarbinary() {
+ sql(
+ "CREATE TABLE %s (id bigint NOT NULL, data binary) USING iceberg
PARTITIONED BY (data)",
+ tableName);
+ sql("INSERT INTO TABLE %s VALUES(1, X'e3bcd1'), (2, X'bcd1')", tableName);
+
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(row(1L, new byte[] {-29, -68, -47}), row(2L, new
byte[] {-68, -47})),
+ sql("SELECT * FROM %s ORDER BY id", tableName));
+
+ sql("DELETE FROM %s WHERE data = X'bcd1'", tableName);
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(row(1L, new byte[] {-29, -68, -47})),
+ sql("SELECT * FROM %s", tableName));
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(row(1L, new byte[] {-29, -68, -47})),
+ sql("SELECT * FROM %s where data = X'e3bcd1'", tableName));
+ }
}
diff --git
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
index bd4a41593c..2389bcc173 100644
---
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
+++
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
@@ -162,4 +162,27 @@ public class TestDeleteFrom extends CatalogTestBase {
ImmutableList.of(),
sql("SELECT * FROM %s ORDER BY id", tableName));
}
+
+ @TestTemplate
+ public void testDeleteFromTablePartitionedByVarbinary() {
+ sql(
+ "CREATE TABLE %s (id bigint NOT NULL, data binary) USING iceberg
PARTITIONED BY (data)",
+ tableName);
+ sql("INSERT INTO TABLE %s VALUES(1, X'e3bcd1'), (2, X'bcd1')", tableName);
+
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(row(1L, new byte[] {-29, -68, -47}), row(2L, new
byte[] {-68, -47})),
+ sql("SELECT * FROM %s ORDER BY id", tableName));
+
+ sql("DELETE FROM %s WHERE data = X'bcd1'", tableName);
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(row(1L, new byte[] {-29, -68, -47})),
+ sql("SELECT * FROM %s", tableName));
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(row(1L, new byte[] {-29, -68, -47})),
+ sql("SELECT * FROM %s where data = X'e3bcd1'", tableName));
+ }
}
diff --git
a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
index bd4a41593c..2389bcc173 100644
---
a/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
+++
b/spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/sql/TestDeleteFrom.java
@@ -162,4 +162,27 @@ public class TestDeleteFrom extends CatalogTestBase {
ImmutableList.of(),
sql("SELECT * FROM %s ORDER BY id", tableName));
}
+
+ @TestTemplate
+ public void testDeleteFromTablePartitionedByVarbinary() {
+ sql(
+ "CREATE TABLE %s (id bigint NOT NULL, data binary) USING iceberg
PARTITIONED BY (data)",
+ tableName);
+ sql("INSERT INTO TABLE %s VALUES(1, X'e3bcd1'), (2, X'bcd1')", tableName);
+
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(row(1L, new byte[] {-29, -68, -47}), row(2L, new
byte[] {-68, -47})),
+ sql("SELECT * FROM %s ORDER BY id", tableName));
+
+ sql("DELETE FROM %s WHERE data = X'bcd1'", tableName);
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(row(1L, new byte[] {-29, -68, -47})),
+ sql("SELECT * FROM %s", tableName));
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(row(1L, new byte[] {-29, -68, -47})),
+ sql("SELECT * FROM %s where data = X'e3bcd1'", tableName));
+ }
}