This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch 1.10.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/1.10.x by this push:
new b5e921a47d API: Detect whether required fields nested within optionals
can produce nulls (#14270) (#14512)
b5e921a47d is described below
commit b5e921a47d4e3fc4ab99e52dd8e35a06a0491cb6
Author: Huaxin Gao <[email protected]>
AuthorDate: Wed Nov 5 22:48:49 2025 -0800
API: Detect whether required fields nested within optionals can produce
nulls (#14270) (#14512)
* API: Detect whether required fields nested within optionals can produce
nulls
This partially reverts some changes around the `Accessor` API that were
introduced by https://github.com/apache/iceberg/pull/13804 and uses a Schema
visitor
to detect whether any of the parent fields of a nested required field are
optional.
This info is then used when IS_NULL / NOT_NULL is evaluated
* only check parent fields on IS_NULL/NOT_NULL
(cherry picked from commit a473b1c6e03f43afa2006fb7e8fc4d2ee969b751)
Co-authored-by: Eduard Tudenhoefner <[email protected]>
---
api/src/main/java/org/apache/iceberg/Accessor.java | 5 -
.../main/java/org/apache/iceberg/Accessors.java | 50 +++-------
.../apache/iceberg/expressions/BoundReference.java | 4 +-
.../iceberg/expressions/UnboundPredicate.java | 16 ++-
.../java/org/apache/iceberg/types/TypeUtil.java | 24 +++++
.../iceberg/expressions/TestBoundReference.java | 108 --------------------
.../iceberg/expressions/TestExpressionBinding.java | 88 +++++++++++++++++
.../expressions/TestInclusiveMetricsEvaluator.java | 109 ++++++++++++++++++++-
.../org/apache/iceberg/types/TestTypeUtil.java | 96 ++++++++++++++++++
9 files changed, 340 insertions(+), 160 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/Accessor.java
b/api/src/main/java/org/apache/iceberg/Accessor.java
index 20b09bf910..2a20a04df9 100644
--- a/api/src/main/java/org/apache/iceberg/Accessor.java
+++ b/api/src/main/java/org/apache/iceberg/Accessor.java
@@ -25,9 +25,4 @@ public interface Accessor<T> extends Serializable {
Object get(T container);
Type type();
-
- /** Returns true if the current field or any ancestor in the access path is
optional. */
- default boolean hasOptionalFieldInPath() {
- return false;
- }
}
diff --git a/api/src/main/java/org/apache/iceberg/Accessors.java
b/api/src/main/java/org/apache/iceberg/Accessors.java
index 06ee0a916c..0b36730fbb 100644
--- a/api/src/main/java/org/apache/iceberg/Accessors.java
+++ b/api/src/main/java/org/apache/iceberg/Accessors.java
@@ -59,13 +59,11 @@ public class Accessors {
private final int position;
private final Type type;
private final Class<?> javaClass;
- private final boolean hasOptionalFieldInPath;
- PositionAccessor(int pos, Type type, boolean isOptional) {
+ PositionAccessor(int pos, Type type) {
this.position = pos;
this.type = type;
this.javaClass = type.typeId().javaClass();
- this.hasOptionalFieldInPath = isOptional;
}
@Override
@@ -86,11 +84,6 @@ public class Accessors {
return javaClass;
}
- @Override
- public boolean hasOptionalFieldInPath() {
- return hasOptionalFieldInPath;
- }
-
@Override
public String toString() {
return "Accessor(positions=[" + position + "], type=" + type + ")";
@@ -102,14 +95,12 @@ public class Accessors {
private final int p1;
private final Type type;
private final Class<?> javaClass;
- private final boolean hasOptionalFieldInPath;
- Position2Accessor(int pos, PositionAccessor wrapped, boolean isOptional) {
+ Position2Accessor(int pos, PositionAccessor wrapped) {
this.p0 = pos;
this.p1 = wrapped.position();
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
- this.hasOptionalFieldInPath = isOptional ||
wrapped.hasOptionalFieldInPath();
}
@Override
@@ -126,11 +117,6 @@ public class Accessors {
return javaClass;
}
- @Override
- public boolean hasOptionalFieldInPath() {
- return hasOptionalFieldInPath;
- }
-
@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + "], type=" + type + ")";
@@ -143,15 +129,13 @@ public class Accessors {
private final int p2;
private final Type type;
private final Class<?> javaClass;
- private final boolean hasOptionalFieldInPath;
- Position3Accessor(int pos, Position2Accessor wrapped, boolean isOptional) {
+ Position3Accessor(int pos, Position2Accessor wrapped) {
this.p0 = pos;
this.p1 = wrapped.p0;
this.p2 = wrapped.p1;
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
- this.hasOptionalFieldInPath = isOptional ||
wrapped.hasOptionalFieldInPath();
}
@Override
@@ -164,11 +148,6 @@ public class Accessors {
return type;
}
- @Override
- public boolean hasOptionalFieldInPath() {
- return hasOptionalFieldInPath;
- }
-
@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + ", " + p2 + "], type="
+ type + ")";
@@ -178,12 +157,10 @@ public class Accessors {
private static class WrappedPositionAccessor implements Accessor<StructLike>
{
private final int position;
private final Accessor<StructLike> accessor;
- private final boolean hasOptionalFieldInPath;
- WrappedPositionAccessor(int pos, Accessor<StructLike> accessor, boolean
isOptional) {
+ WrappedPositionAccessor(int pos, Accessor<StructLike> accessor) {
this.position = pos;
this.accessor = accessor;
- this.hasOptionalFieldInPath = isOptional ||
accessor.hasOptionalFieldInPath();
}
@Override
@@ -200,32 +177,27 @@ public class Accessors {
return accessor.type();
}
- @Override
- public boolean hasOptionalFieldInPath() {
- return hasOptionalFieldInPath;
- }
-
@Override
public String toString() {
return "WrappedAccessor(position=" + position + ", wrapped=" + accessor
+ ")";
}
}
- private static Accessor<StructLike> newAccessor(int pos, boolean isOptional,
Type type) {
- return new PositionAccessor(pos, type, isOptional);
+ private static Accessor<StructLike> newAccessor(int pos, Type type) {
+ return new PositionAccessor(pos, type);
}
private static Accessor<StructLike> newAccessor(
int pos, boolean isOptional, Accessor<StructLike> accessor) {
if (isOptional) {
// the wrapped position handles null layers
- return new WrappedPositionAccessor(pos, accessor, isOptional);
+ return new WrappedPositionAccessor(pos, accessor);
} else if (accessor.getClass() == PositionAccessor.class) {
- return new Position2Accessor(pos, (PositionAccessor) accessor,
isOptional);
+ return new Position2Accessor(pos, (PositionAccessor) accessor);
} else if (accessor instanceof Position2Accessor) {
- return new Position3Accessor(pos, (Position2Accessor) accessor,
isOptional);
+ return new Position3Accessor(pos, (Position2Accessor) accessor);
} else {
- return new WrappedPositionAccessor(pos, accessor, isOptional);
+ return new WrappedPositionAccessor(pos, accessor);
}
}
@@ -254,7 +226,7 @@ public class Accessors {
}
// Add an accessor for this field as an Object (may or may not be
primitive).
- accessors.put(field.fieldId(), newAccessor(i, field.isOptional(),
field.type()));
+ accessors.put(field.fieldId(), newAccessor(i, field.type()));
}
return accessors;
diff --git
a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
index decda85f2e..0295fe518f 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
@@ -57,9 +57,7 @@ public class BoundReference<T> implements BoundTerm<T>,
Reference<T> {
@Override
public boolean producesNull() {
- // A leaf required field can evaluate to null if it is optional itself or
any
- // ancestor on the path is optional.
- return accessor.hasOptionalFieldInPath();
+ return field.isOptional();
}
@Override
diff --git
a/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java
b/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java
index 4736ca4a86..75ca9d5835 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java
@@ -27,6 +27,7 @@ import
org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.StructType;
import org.apache.iceberg.util.CharSequenceSet;
@@ -112,7 +113,7 @@ public class UnboundPredicate<T> extends Predicate<T,
UnboundTerm<T>>
BoundTerm<T> bound = term().bind(struct, caseSensitive);
if (literals == null) {
- return bindUnaryOperation(bound);
+ return bindUnaryOperation(struct, bound);
}
if (op() == Operation.IN || op() == Operation.NOT_IN) {
@@ -122,17 +123,19 @@ public class UnboundPredicate<T> extends Predicate<T,
UnboundTerm<T>>
return bindLiteralOperation(bound);
}
- private Expression bindUnaryOperation(BoundTerm<T> boundTerm) {
+ private Expression bindUnaryOperation(StructType struct, BoundTerm<T>
boundTerm) {
switch (op()) {
case IS_NULL:
- if (!boundTerm.producesNull()) {
+ if (!boundTerm.producesNull()
+ && allAncestorFieldsAreRequired(struct,
boundTerm.ref().fieldId())) {
return Expressions.alwaysFalse();
} else if (boundTerm.type().equals(Types.UnknownType.get())) {
return Expressions.alwaysTrue();
}
return new BoundUnaryPredicate<>(Operation.IS_NULL, boundTerm);
case NOT_NULL:
- if (!boundTerm.producesNull()) {
+ if (!boundTerm.producesNull()
+ && allAncestorFieldsAreRequired(struct,
boundTerm.ref().fieldId())) {
return Expressions.alwaysTrue();
} else if (boundTerm.type().equals(Types.UnknownType.get())) {
return Expressions.alwaysFalse();
@@ -155,6 +158,11 @@ public class UnboundPredicate<T> extends Predicate<T,
UnboundTerm<T>>
}
}
+ private boolean allAncestorFieldsAreRequired(StructType struct, int fieldId)
{
+ return TypeUtil.ancestorFields(struct.asSchema(), fieldId).stream()
+ .allMatch(Types.NestedField::isRequired);
+ }
+
private boolean floatingType(Type.TypeID typeID) {
return Type.TypeID.DOUBLE.equals(typeID) ||
Type.TypeID.FLOAT.equals(typeID);
}
diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
index 486b1d695b..b1c556be06 100644
--- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
+++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
@@ -222,6 +222,30 @@ public class TypeUtil {
return ImmutableMap.copyOf(visit(struct, new IndexParents()));
}
+ /**
+ * Searches in the given schema for all ancestor fields of the given field
ID. If the field ID is
+ * defined in a nested type, then all of its ancestor fields are returned.
If the field ID is not
+ * nested, an empty list is returned.
+ *
+ * @param schema The schema to search for the field ID
+ * @param fieldId The field ID to find the parents of
+ * @return A list of all ancestor fields of the given field ID if the field
ID points to a nested
+ * field. If the field ID is not a nested field, then an empty list is
returned.
+ */
+ public static List<Types.NestedField> ancestorFields(Schema schema, int
fieldId) {
+ Map<Integer, Integer> idToParent =
TypeUtil.indexParents(schema.asStruct());
+ List<Types.NestedField> parents = Lists.newArrayList();
+ if (idToParent.containsKey(fieldId)) {
+ Integer parentId = idToParent.get(fieldId);
+ while (parentId != null) {
+ parents.add(schema.findField(parentId));
+ parentId = idToParent.get(parentId);
+ }
+ }
+
+ return parents;
+ }
+
/**
* Assigns fresh ids from the {@link NextID nextId function} for all fields
in a type.
*
diff --git
a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java
b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java
deleted file mode 100644
index ed921b248f..0000000000
--- a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.expressions;
-
-import static org.apache.iceberg.types.Types.NestedField.optional;
-import static org.apache.iceberg.types.Types.NestedField.required;
-import static org.assertj.core.api.Assertions.assertThat;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.stream.Stream;
-import org.apache.iceberg.Accessor;
-import org.apache.iceberg.Schema;
-import org.apache.iceberg.StructLike;
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-import org.apache.iceberg.types.Types;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.Arguments;
-import org.junit.jupiter.params.provider.MethodSource;
-
-public class TestBoundReference {
- // Build a schema with a single nested struct with optionalList.size()
levels with the following
- // structure:
- // s1: struct(s2: struct(s3: struct(..., sn: struct(leaf: int))))
- // where each s{i} is an optional struct if optionalList.get(i) is true and
a required struct if
- // false
- private static Schema buildSchemaFromOptionalList(List<Boolean>
optionalList, String leafName) {
- Preconditions.checkArgument(
- optionalList != null && !optionalList.isEmpty(), "optionalList must
not be null or empty");
- Types.NestedField leaf =
- optionalList.get(optionalList.size() - 1)
- ? optional(optionalList.size(), leafName, Types.IntegerType.get())
- : required(optionalList.size(), leafName, Types.IntegerType.get());
-
- Types.StructType current = Types.StructType.of(leaf);
-
- for (int i = optionalList.size() - 2; i >= 0; i--) {
- int id = i + 1;
- String name = "s" + (i + 1);
- current =
- Types.StructType.of(
- optionalList.get(i) ? optional(id, name, current) : required(id,
name, current));
- }
-
- return new Schema(current.fields());
- }
-
- private static Stream<Arguments> producesNullCases() {
- // the test cases specify two arguments:
- // - the first is a list of booleans that indicate whether fields in the
nested sequence of
- // structs are optional or required. For example, [false, true, false]
will construct a
- // struct like s1.s2.s3 with s1 being required, s2 being optional, and
s3 being required.
- // - the second is a boolean that indicates whether calling producesNull()
on the BoundReference
- // of the leaf field should return true or false.
- return Stream.of(
- // basic fields, no struct levels
- Arguments.of(Arrays.asList(false), false),
- Arguments.of(Arrays.asList(true), true),
- // one level
- Arguments.of(Arrays.asList(false, false), false),
- Arguments.of(Arrays.asList(false, true), true),
- Arguments.of(Arrays.asList(true, false), true),
- // two levels
- Arguments.of(Arrays.asList(false, false, false), false),
- Arguments.of(Arrays.asList(false, false, true), true),
- Arguments.of(Arrays.asList(true, false, false), true),
- Arguments.of(Arrays.asList(false, true, false), true),
- // three levels
- Arguments.of(Arrays.asList(false, false, false, false), false),
- Arguments.of(Arrays.asList(false, false, false, true), true),
- Arguments.of(Arrays.asList(true, false, false, false), true),
- Arguments.of(Arrays.asList(false, true, false, false), true),
- // four levels
- Arguments.of(Arrays.asList(false, false, false, false, false), false),
- Arguments.of(Arrays.asList(false, false, false, false, true), true),
- Arguments.of(Arrays.asList(true, false, false, false, false), true),
- Arguments.of(Arrays.asList(false, true, true, true, false), true));
- }
-
- @ParameterizedTest
- @MethodSource("producesNullCases")
- public void testProducesNull(List<Boolean> optionalList, boolean
expectedProducesNull) {
- String leafName = "leaf";
- Schema schema = buildSchemaFromOptionalList(optionalList, leafName);
- int leafId = optionalList.size();
- Types.NestedField leafField = schema.findField(leafId);
- Accessor<StructLike> accessor = schema.accessorForField(leafId);
-
- BoundReference<Integer> ref = new BoundReference<>(leafField, accessor,
leafName);
- assertThat(ref.producesNull()).isEqualTo(expectedProducesNull);
- }
-}
diff --git
a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
index 5293681f6f..24e58ad1e8 100644
---
a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
+++
b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
@@ -38,13 +38,20 @@ import static
org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Stream;
+import org.apache.iceberg.Schema;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.StructType;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.FieldSource;
+import org.junit.jupiter.params.provider.MethodSource;
public class TestExpressionBinding {
private static final StructType STRUCT =
@@ -421,4 +428,85 @@ public class TestExpressionBinding {
assertThat(pred.term()).as("Should use a
BoundExtract").isInstanceOf(BoundExtract.class);
assertThat(pred.term().type()).isEqualTo(Types.fromPrimitiveString(typeName));
}
+
+ private static Stream<Arguments> nullCasesWithNestedStructs() {
+ // the test cases specify two arguments:
+ // - the first is a list of booleans that indicate whether fields in the
nested sequence of
+ // structs are optional or required. For example, [true, false, true]
will construct a
+ // struct like s1.s2.s3 with s1 being required, s2 being optional, and
s3 being required.
+ // - the second is an expression that indicates what is expected.
+ return Stream.of(
+ // basic fields, no struct levels
+ Arguments.of(Arrays.asList(true), Expressions.alwaysFalse()),
+ Arguments.of(Arrays.asList(false), Expressions.isNull("leaf")),
+ // one level
+ Arguments.of(Arrays.asList(true, true), Expressions.alwaysFalse()),
+ Arguments.of(Arrays.asList(true, false), Expressions.isNull("leaf")),
+ Arguments.of(Arrays.asList(false, true), Expressions.isNull("leaf")),
+ // two levels
+ Arguments.of(Arrays.asList(true, true, true),
Expressions.alwaysFalse()),
+ Arguments.of(Arrays.asList(true, true, false),
Expressions.isNull("leaf")),
+ Arguments.of(Arrays.asList(false, true, true),
Expressions.isNull("leaf")),
+ Arguments.of(Arrays.asList(true, false, true),
Expressions.isNull("leaf")),
+ // three levels
+ Arguments.of(Arrays.asList(true, true, true, true),
Expressions.alwaysFalse()),
+ Arguments.of(Arrays.asList(true, true, true, false),
Expressions.isNull("leaf")),
+ Arguments.of(Arrays.asList(false, true, true, true),
Expressions.isNull("leaf")),
+ Arguments.of(Arrays.asList(true, false, true, true),
Expressions.isNull("leaf")),
+ // four levels
+ Arguments.of(Arrays.asList(true, true, true, true, true),
Expressions.alwaysFalse()),
+ Arguments.of(Arrays.asList(true, true, true, true, false),
Expressions.isNull("leaf")),
+ Arguments.of(Arrays.asList(false, true, true, true, true),
Expressions.isNull("leaf")),
+ Arguments.of(Arrays.asList(true, false, false, false, true),
Expressions.isNull("leaf")));
+ }
+
+ private Schema buildNestedSchema(List<Boolean> requiredFields, String
leafName) {
+ // Build a schema with a single nested struct with requiredFields.size()
levels with the
+ // following structure:
+ // s1: struct(s2: struct(s3: struct(..., sn: struct(leaf: int))))
+ // where each s{i} is a required struct if requiredFields.get(i) is true
and an optional struct
+ // if false
+ Preconditions.checkArgument(
+ requiredFields != null && !requiredFields.isEmpty(),
+ "Invalid required fields: null or empty");
+ Types.NestedField leaf =
+ requiredFields.get(requiredFields.size() - 1)
+ ? required(requiredFields.size(), leafName,
Types.IntegerType.get())
+ : optional(requiredFields.size(), leafName,
Types.IntegerType.get());
+
+ Types.StructType current = Types.StructType.of(leaf);
+
+ for (int i = requiredFields.size() - 2; i >= 0; i--) {
+ int id = i + 1;
+ String name = "s" + (i + 1);
+ current =
+ Types.StructType.of(
+ requiredFields.get(i) ? required(id, name, current) :
optional(id, name, current));
+ }
+
+ return new Schema(current.fields());
+ }
+
+ @ParameterizedTest
+ @MethodSource("nullCasesWithNestedStructs")
+ public void testIsNullWithNestedStructs(List<Boolean> requiredFields,
Expression expression) {
+ String leafName = "leaf";
+ Schema schema = buildNestedSchema(requiredFields, leafName);
+ int leafId = requiredFields.size();
+ int level = 1;
+ StringBuilder pathBuilder = new StringBuilder();
+ while (level < leafId) {
+ pathBuilder.append("s").append(level).append(".");
+ level++;
+ }
+
+ String path = pathBuilder.append(leafName).toString();
+ Expression bound = Binder.bind(schema.asStruct(), isNull(path));
+ TestHelpers.assertAllReferencesBound("IsNull", bound);
+ assertThat(bound.op()).isEqualTo(expression.op());
+
+ bound = Binder.bind(schema.asStruct(), Expressions.notNull(path));
+ TestHelpers.assertAllReferencesBound("NotNull", bound);
+ assertThat(bound.op()).isEqualTo(expression.negate().op());
+ }
}
diff --git
a/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java
b/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java
index 7069d891c3..2f4fbf3957 100644
---
a/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java
+++
b/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java
@@ -73,6 +73,21 @@ public class TestInclusiveMetricsEvaluator {
optional(13, "no_nan_stats", Types.DoubleType.get()),
optional(14, "some_empty", Types.StringType.get()));
+ private static final Schema NESTED_SCHEMA =
+ new Schema(
+ required(
+ 100,
+ "required_address",
+ Types.StructType.of(
+ required(102, "required_street1", Types.StringType.get()),
+ optional(103, "optional_street1", Types.StringType.get()))),
+ optional(
+ 101,
+ "optional_address",
+ Types.StructType.of(
+ required(104, "required_street2", Types.StringType.get()),
+ optional(105, "optional_street2", Types.StringType.get()))));
+
private static final int INT_MIN_VALUE = 30;
private static final int INT_MAX_VALUE = 79;
@@ -173,7 +188,7 @@ public class TestInclusiveMetricsEvaluator {
private static final DataFile FILE_5 =
new TestDataFile(
- "file_4.avro",
+ "file_5.avro",
Row.of(),
50,
// any value counts, including nulls
@@ -187,6 +202,22 @@ public class TestInclusiveMetricsEvaluator {
// upper bounds
ImmutableMap.of(3, toByteBuffer(StringType.get(), "abcdefghi")));
+ private static final DataFile FILE_6 =
+ new TestDataFile(
+ "file_6.avro",
+ Row.of(),
+ 10,
+ // any value counts, including nulls
+ ImmutableMap.of(100, 5L, 101, 5L, 102, 5L, 103, 5L, 104, 5L, 105,
5L),
+ // null value counts
+ ImmutableMap.of(100, 0L, 101, 5L, 103, 5L, 104, 5L, 105, 5L),
+ // nan value counts
+ null,
+ // lower bounds
+ null,
+ // upper bounds
+ null);
+
@Test
public void testAllNulls() {
boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA,
notNull("all_nulls")).eval(FILE);
@@ -863,4 +894,80 @@ public class TestInclusiveMetricsEvaluator {
shouldRead = new InclusiveMetricsEvaluator(SCHEMA, notIn("no_nulls",
"abc", "def")).eval(FILE);
assertThat(shouldRead).as("Should read: notIn on no nulls
column").isTrue();
}
+
+ @Test
+ public void testIsNullInNestedStruct() {
+ // read required_address and its nested fields
+ boolean shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
isNull("required_address")).eval(FILE_6);
+ assertThat(shouldRead).as("Should not read: required_address is
required").isFalse();
+
+ shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
isNull("required_address.required_street1"))
+ .eval(FILE_6);
+ assertThat(shouldRead)
+ .as("Should not read: required_address.required_street1 is required")
+ .isFalse();
+
+ shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
isNull("required_address.optional_street1"))
+ .eval(FILE_6);
+ assertThat(shouldRead)
+ .as("Should read: required_address.optional_street1 is optional")
+ .isTrue();
+
+ // read optional_address and its nested fields
+ shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
isNull("optional_address")).eval(FILE_6);
+ assertThat(shouldRead).as("Should read: optional_address is
optional").isTrue();
+
+ shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
isNull("optional_address.required_street2"))
+ .eval(FILE_6);
+ assertThat(shouldRead).as("Should read: optional_address is
optional").isTrue();
+
+ shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
isNull("optional_address.optional_street2"))
+ .eval(FILE_6);
+ assertThat(shouldRead).as("Should read: optional_address is
optional").isTrue();
+ }
+
+ @Test
+ public void testNotNullInNestedStruct() {
+ // read required_address and its nested fields
+ boolean shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
notNull("required_address")).eval(FILE_6);
+ assertThat(shouldRead).as("Should read: required_address is
required").isTrue();
+
+ shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
notNull("required_address.required_street1"))
+ .eval(FILE_6);
+ assertThat(shouldRead)
+ .as("Should read: required_address.required_street1 is required")
+ .isTrue();
+
+ shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
notNull("required_address.optional_street1"))
+ .eval(FILE_6);
+ assertThat(shouldRead)
+ .as("Should not read: required_address.optional_street1 is optional")
+ .isFalse();
+
+ // read optional_address and its nested fields
+ shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
notNull("optional_address")).eval(FILE_6);
+ assertThat(shouldRead).as("Should not read: optional_address is
optional").isFalse();
+
+ shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
notNull("optional_address.required_street2"))
+ .eval(FILE_6);
+ assertThat(shouldRead).as("Should not read: optional_address is
optional").isFalse();
+
+ shouldRead =
+ new InclusiveMetricsEvaluator(NESTED_SCHEMA,
notNull("optional_address.optional_street2"))
+ .eval(FILE_6);
+ assertThat(shouldRead)
+ .as("Should not read: optional_address.optional_street2 is optional")
+ .isFalse();
+ }
}
diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
index 63f2027ab0..d4742f5187 100644
--- a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
+++ b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
@@ -849,4 +849,100 @@ public class TestTypeUtil {
Schema reassignedSchema = TypeUtil.reassignDoc(schema, docSourceSchema);
assertThat(reassignedSchema.asStruct()).isEqualTo(docSourceSchema.asStruct());
}
+
+ @Test
+ public void ancestorFieldsInEmptySchema() {
+ assertThat(TypeUtil.ancestorFields(new Schema(), -1)).isEmpty();
+ assertThat(TypeUtil.ancestorFields(new Schema(), 1)).isEmpty();
+ }
+
+ @Test
+ public void ancestorFieldsInNonNestedSchema() {
+ Schema schema =
+ new Schema(
+ required(0, "a", Types.IntegerType.get()), required(1, "A",
Types.IntegerType.get()));
+
+ assertThat(TypeUtil.ancestorFields(schema, 0)).isEmpty();
+ assertThat(TypeUtil.ancestorFields(schema, 1)).isEmpty();
+ }
+
+ @Test
+ public void ancestorFieldsInNestedSchema() {
+ Types.NestedField innerPreferences =
+ optional(
+ 8,
+ "inner_preferences",
+ Types.StructType.of(
+ required(12, "feature3", Types.BooleanType.get()),
+ optional(13, "feature4", Types.BooleanType.get())));
+ Types.NestedField preferences =
+ optional(
+ 3,
+ "preferences",
+ Types.StructType.of(
+ required(6, "feature1", Types.BooleanType.get()),
+ optional(7, "feature2", Types.BooleanType.get()),
+ innerPreferences));
+
+ // define locations map with all nested fields
+ Types.StructType locationsKeyStruct =
+ Types.StructType.of(
+ required(20, "address", Types.StringType.get()),
+ required(21, "city", Types.StringType.get()),
+ required(22, "state", Types.StringType.get()),
+ required(23, "zip", IntegerType.get()));
+ Types.StructType locationsValueStruct =
+ Types.StructType.of(
+ required(14, "lat", Types.FloatType.get()),
+ required(15, "long", Types.FloatType.get()));
+ Types.NestedField locationsKey = required(9, "key", locationsKeyStruct);
+ Types.NestedField locationsValue = required(10, "value",
locationsValueStruct);
+ Types.NestedField locations =
+ required(
+ 4,
+ "locations",
+ Types.MapType.ofRequired(9, 10, locationsKeyStruct,
locationsValueStruct));
+
+ // define points list with all nested fields
+ Types.StructType pointsStruct =
+ Types.StructType.of(
+ required(16, "x", Types.LongType.get()), required(17, "y",
Types.LongType.get()));
+ Types.NestedField pointsElement = optional(11, "element", pointsStruct);
+ Types.NestedField points = optional(5, "points",
Types.ListType.ofOptional(11, pointsStruct));
+
+ Types.NestedField id = required(1, "id", IntegerType.get());
+ Types.NestedField data = optional(2, "data", Types.StringType.get());
+ Schema schema = new Schema(id, data, preferences, locations, points);
+
+ // non-nested fields don't have parents
+ assertThat(TypeUtil.ancestorFields(schema, id.fieldId())).isEmpty();
+ assertThat(TypeUtil.ancestorFields(schema, data.fieldId())).isEmpty();
+
+ // verify preferences struct and all of its nested fields (6-8, 12+13)
+ assertThat(TypeUtil.ancestorFields(schema,
preferences.fieldId())).isEmpty();
+ assertThat(TypeUtil.ancestorFields(schema,
6)).containsExactly(preferences);
+ assertThat(TypeUtil.ancestorFields(schema,
7)).containsExactly(preferences);
+ assertThat(TypeUtil.ancestorFields(schema, innerPreferences.fieldId()))
+ .containsExactly(preferences);
+ assertThat(TypeUtil.ancestorFields(schema,
12)).containsExactly(innerPreferences, preferences);
+ assertThat(TypeUtil.ancestorFields(schema,
13)).containsExactly(innerPreferences, preferences);
+
+ // verify locations map and all of its nested fields (IDs 9+10, 20-23 and
14+15)
+ assertThat(TypeUtil.ancestorFields(schema, locations.fieldId())).isEmpty();
+ assertThat(TypeUtil.ancestorFields(schema,
locationsKey.fieldId())).containsExactly(locations);
+ assertThat(TypeUtil.ancestorFields(schema,
20)).containsExactly(locationsKey, locations);
+ assertThat(TypeUtil.ancestorFields(schema,
21)).containsExactly(locationsKey, locations);
+ assertThat(TypeUtil.ancestorFields(schema,
22)).containsExactly(locationsKey, locations);
+ assertThat(TypeUtil.ancestorFields(schema,
23)).containsExactly(locationsKey, locations);
+ assertThat(TypeUtil.ancestorFields(schema, locationsValue.fieldId()))
+ .containsExactly(locations);
+ assertThat(TypeUtil.ancestorFields(schema,
14)).containsExactly(locationsValue, locations);
+ assertThat(TypeUtil.ancestorFields(schema,
15)).containsExactly(locationsValue, locations);
+
+ // verify points list and all of its nested fields (IDs 11 and 16+17)
+ assertThat(TypeUtil.ancestorFields(schema, points.fieldId())).isEmpty();
+ assertThat(TypeUtil.ancestorFields(schema,
pointsElement.fieldId())).containsExactly(points);
+ assertThat(TypeUtil.ancestorFields(schema,
16)).containsExactly(pointsElement, points);
+ assertThat(TypeUtil.ancestorFields(schema,
17)).containsExactly(pointsElement, points);
+ }
}