This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch 1.10.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/1.10.x by this push:
new ce4e4e0072 API: required nested fields within optional structs can
produce null (#13804) (#14460)
ce4e4e0072 is described below
commit ce4e4e0072cbc1da74cad7ae3f76aeab91e872c7
Author: Huaxin Gao <[email protected]>
AuthorDate: Fri Oct 31 17:21:41 2025 -0700
API: required nested fields within optional structs can produce null
(#13804) (#14460)
(cherry picked from commit c05f6c98d66a5a05ec5dca0b7a1c28036187317d)
Co-authored-by: Dejan Gvozdenac <[email protected]>
---
api/src/main/java/org/apache/iceberg/Accessor.java | 5 +
.../main/java/org/apache/iceberg/Accessors.java | 50 +++++++---
.../apache/iceberg/expressions/BoundReference.java | 4 +-
.../iceberg/expressions/TestBoundReference.java | 108 +++++++++++++++++++++
.../iceberg/parquet/TestBloomRowGroupFilter.java | 8 +-
.../org/apache/iceberg/spark/sql/TestSelect.java | 17 ++++
.../org/apache/iceberg/spark/sql/TestSelect.java | 17 ++++
.../org/apache/iceberg/spark/sql/TestSelect.java | 17 ++++
8 files changed, 209 insertions(+), 17 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/Accessor.java
b/api/src/main/java/org/apache/iceberg/Accessor.java
index 2a20a04df9..20b09bf910 100644
--- a/api/src/main/java/org/apache/iceberg/Accessor.java
+++ b/api/src/main/java/org/apache/iceberg/Accessor.java
@@ -25,4 +25,9 @@ public interface Accessor<T> extends Serializable {
Object get(T container);
Type type();
+
+ /** Returns true if the current field or any ancestor in the access path is
optional. */
+ default boolean hasOptionalFieldInPath() {
+ return false;
+ }
}
diff --git a/api/src/main/java/org/apache/iceberg/Accessors.java
b/api/src/main/java/org/apache/iceberg/Accessors.java
index 0b36730fbb..06ee0a916c 100644
--- a/api/src/main/java/org/apache/iceberg/Accessors.java
+++ b/api/src/main/java/org/apache/iceberg/Accessors.java
@@ -59,11 +59,13 @@ public class Accessors {
private final int position;
private final Type type;
private final Class<?> javaClass;
+ private final boolean hasOptionalFieldInPath;
- PositionAccessor(int pos, Type type) {
+ PositionAccessor(int pos, Type type, boolean isOptional) {
this.position = pos;
this.type = type;
this.javaClass = type.typeId().javaClass();
+ this.hasOptionalFieldInPath = isOptional;
}
@Override
@@ -84,6 +86,11 @@ public class Accessors {
return javaClass;
}
+ @Override
+ public boolean hasOptionalFieldInPath() {
+ return hasOptionalFieldInPath;
+ }
+
@Override
public String toString() {
return "Accessor(positions=[" + position + "], type=" + type + ")";
@@ -95,12 +102,14 @@ public class Accessors {
private final int p1;
private final Type type;
private final Class<?> javaClass;
+ private final boolean hasOptionalFieldInPath;
- Position2Accessor(int pos, PositionAccessor wrapped) {
+ Position2Accessor(int pos, PositionAccessor wrapped, boolean isOptional) {
this.p0 = pos;
this.p1 = wrapped.position();
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
+ this.hasOptionalFieldInPath = isOptional ||
wrapped.hasOptionalFieldInPath();
}
@Override
@@ -117,6 +126,11 @@ public class Accessors {
return javaClass;
}
+ @Override
+ public boolean hasOptionalFieldInPath() {
+ return hasOptionalFieldInPath;
+ }
+
@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + "], type=" + type + ")";
@@ -129,13 +143,15 @@ public class Accessors {
private final int p2;
private final Type type;
private final Class<?> javaClass;
+ private final boolean hasOptionalFieldInPath;
- Position3Accessor(int pos, Position2Accessor wrapped) {
+ Position3Accessor(int pos, Position2Accessor wrapped, boolean isOptional) {
this.p0 = pos;
this.p1 = wrapped.p0;
this.p2 = wrapped.p1;
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
+ this.hasOptionalFieldInPath = isOptional ||
wrapped.hasOptionalFieldInPath();
}
@Override
@@ -148,6 +164,11 @@ public class Accessors {
return type;
}
+ @Override
+ public boolean hasOptionalFieldInPath() {
+ return hasOptionalFieldInPath;
+ }
+
@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + ", " + p2 + "], type="
+ type + ")";
@@ -157,10 +178,12 @@ public class Accessors {
private static class WrappedPositionAccessor implements Accessor<StructLike>
{
private final int position;
private final Accessor<StructLike> accessor;
+ private final boolean hasOptionalFieldInPath;
- WrappedPositionAccessor(int pos, Accessor<StructLike> accessor) {
+ WrappedPositionAccessor(int pos, Accessor<StructLike> accessor, boolean
isOptional) {
this.position = pos;
this.accessor = accessor;
+ this.hasOptionalFieldInPath = isOptional ||
accessor.hasOptionalFieldInPath();
}
@Override
@@ -177,27 +200,32 @@ public class Accessors {
return accessor.type();
}
+ @Override
+ public boolean hasOptionalFieldInPath() {
+ return hasOptionalFieldInPath;
+ }
+
@Override
public String toString() {
return "WrappedAccessor(position=" + position + ", wrapped=" + accessor
+ ")";
}
}
- private static Accessor<StructLike> newAccessor(int pos, Type type) {
- return new PositionAccessor(pos, type);
+ private static Accessor<StructLike> newAccessor(int pos, boolean isOptional,
Type type) {
+ return new PositionAccessor(pos, type, isOptional);
}
private static Accessor<StructLike> newAccessor(
int pos, boolean isOptional, Accessor<StructLike> accessor) {
if (isOptional) {
// the wrapped position handles null layers
- return new WrappedPositionAccessor(pos, accessor);
+ return new WrappedPositionAccessor(pos, accessor, isOptional);
} else if (accessor.getClass() == PositionAccessor.class) {
- return new Position2Accessor(pos, (PositionAccessor) accessor);
+ return new Position2Accessor(pos, (PositionAccessor) accessor,
isOptional);
} else if (accessor instanceof Position2Accessor) {
- return new Position3Accessor(pos, (Position2Accessor) accessor);
+ return new Position3Accessor(pos, (Position2Accessor) accessor,
isOptional);
} else {
- return new WrappedPositionAccessor(pos, accessor);
+ return new WrappedPositionAccessor(pos, accessor, isOptional);
}
}
@@ -226,7 +254,7 @@ public class Accessors {
}
// Add an accessor for this field as an Object (may or may not be
primitive).
- accessors.put(field.fieldId(), newAccessor(i, field.type()));
+ accessors.put(field.fieldId(), newAccessor(i, field.isOptional(),
field.type()));
}
return accessors;
diff --git
a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
index 0295fe518f..decda85f2e 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
@@ -57,7 +57,9 @@ public class BoundReference<T> implements BoundTerm<T>,
Reference<T> {
@Override
public boolean producesNull() {
- return field.isOptional();
+ // A leaf required field can evaluate to null if it is optional itself or
any
+ // ancestor on the path is optional.
+ return accessor.hasOptionalFieldInPath();
}
@Override
diff --git
a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java
b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java
new file mode 100644
index 0000000000..ed921b248f
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.expressions;
+
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Stream;
+import org.apache.iceberg.Accessor;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+public class TestBoundReference {
+ // Build a schema with a single nested struct with optionalList.size()
levels with the following
+ // structure:
+ // s1: struct(s2: struct(s3: struct(..., sn: struct(leaf: int))))
+ // where each s{i} is an optional struct if optionalList.get(i) is true and
a required struct if
+ // false
+ private static Schema buildSchemaFromOptionalList(List<Boolean>
optionalList, String leafName) {
+ Preconditions.checkArgument(
+ optionalList != null && !optionalList.isEmpty(), "optionalList must
not be null or empty");
+ Types.NestedField leaf =
+ optionalList.get(optionalList.size() - 1)
+ ? optional(optionalList.size(), leafName, Types.IntegerType.get())
+ : required(optionalList.size(), leafName, Types.IntegerType.get());
+
+ Types.StructType current = Types.StructType.of(leaf);
+
+ for (int i = optionalList.size() - 2; i >= 0; i--) {
+ int id = i + 1;
+ String name = "s" + (i + 1);
+ current =
+ Types.StructType.of(
+ optionalList.get(i) ? optional(id, name, current) : required(id,
name, current));
+ }
+
+ return new Schema(current.fields());
+ }
+
+ private static Stream<Arguments> producesNullCases() {
+ // the test cases specify two arguments:
+ // - the first is a list of booleans that indicate whether fields in the
nested sequence of
+ // structs are optional or required. For example, [false, true, false]
will construct a
+ // struct like s1.s2.s3 with s1 being required, s2 being optional, and
s3 being required.
+ // - the second is a boolean that indicates whether calling producesNull()
on the BoundReference
+ // of the leaf field should return true or false.
+ return Stream.of(
+ // basic fields, no struct levels
+ Arguments.of(Arrays.asList(false), false),
+ Arguments.of(Arrays.asList(true), true),
+ // one level
+ Arguments.of(Arrays.asList(false, false), false),
+ Arguments.of(Arrays.asList(false, true), true),
+ Arguments.of(Arrays.asList(true, false), true),
+ // two levels
+ Arguments.of(Arrays.asList(false, false, false), false),
+ Arguments.of(Arrays.asList(false, false, true), true),
+ Arguments.of(Arrays.asList(true, false, false), true),
+ Arguments.of(Arrays.asList(false, true, false), true),
+ // three levels
+ Arguments.of(Arrays.asList(false, false, false, false), false),
+ Arguments.of(Arrays.asList(false, false, false, true), true),
+ Arguments.of(Arrays.asList(true, false, false, false), true),
+ Arguments.of(Arrays.asList(false, true, false, false), true),
+ // four levels
+ Arguments.of(Arrays.asList(false, false, false, false, false), false),
+ Arguments.of(Arrays.asList(false, false, false, false, true), true),
+ Arguments.of(Arrays.asList(true, false, false, false, false), true),
+ Arguments.of(Arrays.asList(false, true, true, true, false), true));
+ }
+
+ @ParameterizedTest
+ @MethodSource("producesNullCases")
+ public void testProducesNull(List<Boolean> optionalList, boolean
expectedProducesNull) {
+ String leafName = "leaf";
+ Schema schema = buildSchemaFromOptionalList(optionalList, leafName);
+ int leafId = optionalList.size();
+ Types.NestedField leafField = schema.findField(leafId);
+ Accessor<StructLike> accessor = schema.accessorForField(leafId);
+
+ BoundReference<Integer> ref = new BoundReference<>(leafField, accessor,
leafName);
+ assertThat(ref.producesNull()).isEqualTo(expectedProducesNull);
+ }
+}
diff --git
a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java
b/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java
index f54ddfcc5a..694c72876c 100644
---
a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java
+++
b/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java
@@ -297,9 +297,7 @@ public class TestBloomRowGroupFilter {
shouldRead =
new ParquetBloomRowGroupFilter(SCHEMA,
notNull("struct_not_null.int_field"))
.shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
- assertThat(shouldRead)
- .as("Should read: this field is required and are always not-null")
- .isTrue();
+ assertThat(shouldRead).as("Should read: bloom filter doesn't
help").isTrue();
}
@Test
@@ -323,8 +321,8 @@ public class TestBloomRowGroupFilter {
new ParquetBloomRowGroupFilter(SCHEMA,
isNull("struct_not_null.int_field"))
.shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
assertThat(shouldRead)
- .as("Should skip: this field is required and are always not-null")
- .isFalse();
+ .as("Should read: required nested field can still be null if any
ancestor is optional")
+ .isTrue();
}
@Test
diff --git
a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
index ab15139820..a21dfd388d 100644
---
a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
+++
b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
@@ -613,4 +613,21 @@ public class TestSelect extends CatalogTestBase {
assertEquals("Should return all expected rows", ImmutableList.of(row(1)),
result);
sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
}
+
+ @TestTemplate
+ public void testRequiredNestedFieldInOptionalStructFilter() {
+ String nestedStructTable = tableName("nested_struct_table");
+ sql(
+ "CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT
NULL>) "
+ + "USING iceberg",
+ nestedStructTable);
+ sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
+ sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);
+
+ List<Object[]> result =
+ sql("SELECT id FROM %s WHERE address.street IS NULL",
nestedStructTable);
+
+ assertEquals("Should return all expected rows", ImmutableList.of(row(0)),
result);
+ sql("DROP TABLE IF EXISTS %s", nestedStructTable);
+ }
}
diff --git
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
index 240622750f..68b93be479 100644
---
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
+++
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
@@ -613,4 +613,21 @@ public class TestSelect extends CatalogTestBase {
assertEquals("Should return all expected rows", ImmutableList.of(row(1)),
result);
sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
}
+
+ @TestTemplate
+ public void testRequiredNestedFieldInOptionalStructFilter() {
+ String nestedStructTable = tableName("nested_struct_table");
+ sql(
+ "CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT
NULL>) "
+ + "USING iceberg",
+ nestedStructTable);
+ sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
+ sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);
+
+ List<Object[]> result =
+ sql("SELECT id FROM %s WHERE address.street IS NULL",
nestedStructTable);
+
+ assertEquals("Should return all expected rows", ImmutableList.of(row(0)),
result);
+ sql("DROP TABLE IF EXISTS %s", nestedStructTable);
+ }
}
diff --git
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
index 240622750f..68b93be479 100644
---
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
+++
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
@@ -613,4 +613,21 @@ public class TestSelect extends CatalogTestBase {
assertEquals("Should return all expected rows", ImmutableList.of(row(1)),
result);
sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
}
+
+ @TestTemplate
+ public void testRequiredNestedFieldInOptionalStructFilter() {
+ String nestedStructTable = tableName("nested_struct_table");
+ sql(
+ "CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT
NULL>) "
+ + "USING iceberg",
+ nestedStructTable);
+ sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
+ sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);
+
+ List<Object[]> result =
+ sql("SELECT id FROM %s WHERE address.street IS NULL",
nestedStructTable);
+
+ assertEquals("Should return all expected rows", ImmutableList.of(row(0)),
result);
+ sql("DROP TABLE IF EXISTS %s", nestedStructTable);
+ }
}