This is an automated email from the ASF dual-hosted git repository.

russellspitzer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 157b2488eb API: Optimize NOT IN and != predicates for single-value 
partition manifests (#15064)
157b2488eb is described below

commit 157b2488ebfc138370bf482458df96bcdc7245dd
Author: Joy Haldar <[email protected]>
AuthorDate: Fri Jan 23 00:31:33 2026 +0530

    API: Optimize NOT IN and != predicates for single-value partition manifests 
(#15064)
---
 .../iceberg/expressions/ManifestEvaluator.java     |  53 +++++++++++
 .../TestInclusiveManifestEvaluator.java            | 104 ++++++++++++++++++++-
 2 files changed, 155 insertions(+), 2 deletions(-)

diff --git 
a/api/src/main/java/org/apache/iceberg/expressions/ManifestEvaluator.java 
b/api/src/main/java/org/apache/iceberg/expressions/ManifestEvaluator.java
index fc3d394203..21762946ca 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ManifestEvaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ManifestEvaluator.java
@@ -269,6 +269,14 @@ public class ManifestEvaluator {
     public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
       // because the bounds are not necessarily a min or max value, this 
cannot be answered using
       // them. notEq(col, X) with (X, Y) doesn't guarantee that X is a value 
in col.
+      // However, when lower == upper and the manifest has no nulls or NaN 
values, we can safely
+      // prune if that value equals the literal.
+      T value = uniqueValue(ref);
+
+      if (value != null && lit.comparator().compare(value, lit.value()) == 0) {
+        return ROWS_CANNOT_MATCH;
+      }
+
       return ROWS_MIGHT_MATCH;
     }
 
@@ -313,6 +321,14 @@ public class ManifestEvaluator {
     public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
       // because the bounds are not necessarily a min or max value, this 
cannot be answered using
       // them. notIn(col, {X, ...}) with (X, Y) doesn't guarantee that X is a 
value in col.
+      // However, when lower == upper and the manifest has no nulls or NaN 
values, we can safely
+      // prune if that value is in the exclusion set.
+      T value = uniqueValue(ref);
+
+      if (value != null && literalSet.contains(value)) {
+        return ROWS_CANNOT_MATCH;
+      }
+
       return ROWS_MIGHT_MATCH;
     }
 
@@ -400,6 +416,43 @@ public class ManifestEvaluator {
       return ROWS_MIGHT_MATCH;
     }
 
+    /**
+     * Returns the partition field's single value if all partitions contain 
the same value. Defined
+     * as a partition field with no nulls, no NaNs (for floating-point types), 
and lower bound
+     * equals upper bound. Returns null otherwise.
+     */
+    private <T> T uniqueValue(BoundReference<T> ref) {
+      int pos = Accessors.toPosition(ref.accessor());
+      PartitionFieldSummary fieldStats = stats.get(pos);
+
+      if (fieldStats.containsNull()) {
+        return null;
+      }
+
+      Type.TypeID typeId = ref.type().typeId();
+      if (Type.TypeID.FLOAT.equals(typeId) || 
Type.TypeID.DOUBLE.equals(typeId)) {
+        if (fieldStats.containsNaN() == null || fieldStats.containsNaN()) {
+          return null;
+        }
+      }
+
+      ByteBuffer lowerBound = fieldStats.lowerBound();
+      ByteBuffer upperBound = fieldStats.upperBound();
+
+      if (lowerBound == null || upperBound == null) {
+        return null;
+      }
+
+      T lower = Conversions.fromByteBuffer(ref.type(), lowerBound);
+      T upper = Conversions.fromByteBuffer(ref.type(), upperBound);
+
+      if (ref.comparator().compare(lower, upper) != 0) {
+        return null;
+      }
+
+      return lower;
+    }
+
     private boolean allValuesAreNull(PartitionFieldSummary summary, 
Type.TypeID typeId) {
       // containsNull encodes whether at least one partition value is null,
       // lowerBound is null if all partition values are null
diff --git 
a/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveManifestEvaluator.java
 
b/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveManifestEvaluator.java
index 6c4944e9cd..78e6064eb4 100644
--- 
a/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveManifestEvaluator.java
+++ 
b/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveManifestEvaluator.java
@@ -66,7 +66,10 @@ public class TestInclusiveManifestEvaluator {
           optional(12, "no_nan_or_null", Types.DoubleType.get()),
           optional(13, "all_nulls_missing_nan_float", Types.FloatType.get()),
           optional(14, "all_same_value_or_null", Types.StringType.get()),
-          optional(15, "no_nulls_same_value_a", Types.StringType.get()));
+          optional(15, "no_nulls_same_value_a", Types.StringType.get()),
+          optional(16, "single_value_with_nan", Types.FloatType.get()),
+          optional(17, "single_value_nan_unknown", Types.FloatType.get()),
+          optional(18, "single_value_no_nan", Types.FloatType.get()));
 
   private static final PartitionSpec SPEC =
       PartitionSpec.builderFor(SCHEMA)
@@ -84,6 +87,9 @@ public class TestInclusiveManifestEvaluator {
           .identity("all_nulls_missing_nan_float")
           .identity("all_same_value_or_null")
           .identity("no_nulls_same_value_a")
+          .identity("single_value_with_nan")
+          .identity("single_value_nan_unknown")
+          .identity("single_value_no_nan")
           .build();
 
   private static final int INT_MIN_VALUE = 30;
@@ -128,7 +134,21 @@ public class TestInclusiveManifestEvaluator {
                   toByteBuffer(Types.FloatType.get(), 20F)),
               new TestHelpers.TestFieldSummary(true, null, null),
               new TestHelpers.TestFieldSummary(true, STRING_MIN, STRING_MIN),
-              new TestHelpers.TestFieldSummary(false, STRING_MIN, STRING_MIN)),
+              new TestHelpers.TestFieldSummary(false, STRING_MIN, STRING_MIN),
+              new TestHelpers.TestFieldSummary(
+                  false,
+                  true,
+                  toByteBuffer(Types.FloatType.get(), 5.0F),
+                  toByteBuffer(Types.FloatType.get(), 5.0F)),
+              new TestHelpers.TestFieldSummary(
+                  false,
+                  toByteBuffer(Types.FloatType.get(), 5.0F),
+                  toByteBuffer(Types.FloatType.get(), 5.0F)),
+              new TestHelpers.TestFieldSummary(
+                  false,
+                  false,
+                  toByteBuffer(Types.FloatType.get(), 5.0F),
+                  toByteBuffer(Types.FloatType.get(), 5.0F))),
           null);
 
   @Test
@@ -753,4 +773,84 @@ public class TestInclusiveManifestEvaluator {
         ManifestEvaluator.forRowFilter(notIn("no_nulls", "abc", "def"), SPEC, 
true).eval(FILE);
     assertThat(shouldRead).as("Should read: notIn on no nulls 
column").isTrue();
   }
+
+  @Test
+  public void testNotEqWithSingleValue() {
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(notEqual("no_nulls", "a"), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: manifest has range of 
values").isTrue();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notEqual("no_nulls_same_value_a", "a"), 
SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead)
+        .as("Should not read: manifest contains single value equal to literal")
+        .isFalse();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notEqual("no_nulls_same_value_a", "b"), 
SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: manifest contains single value not equal to literal")
+        .isTrue();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notEqual("all_same_value_or_null", 
"a"), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: manifest has nulls which match != 
predicate").isTrue();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notEqual("single_value_with_nan", 
5.0F), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: manifest has NaN values which match != predicate")
+        .isTrue();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notEqual("single_value_nan_unknown", 
5.0F), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: manifest has unknown NaN 
info").isTrue();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notEqual("single_value_no_nan", 5.0F), 
SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead)
+        .as("Should not read: manifest contains single float value with no 
NaNs")
+        .isFalse();
+  }
+
+  @Test
+  public void testNotInWithSingleValue() {
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(notIn("no_nulls", "a", "b"), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: manifest has range of 
values").isTrue();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notIn("no_nulls_same_value_a", "a", 
"b"), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead)
+        .as("Should not read: manifest contains single value in exclusion 
list")
+        .isFalse();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notIn("no_nulls_same_value_a", "b", 
"c"), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: manifest contains single value not in exclusion 
list")
+        .isTrue();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notIn("all_same_value_or_null", "a", 
"b"), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: manifest has nulls which match NOT IN predicate")
+        .isTrue();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notIn("single_value_with_nan", 5.0F, 
10.0F), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: manifest has NaN values which match NOT IN 
predicate")
+        .isTrue();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notIn("single_value_nan_unknown", 5.0F, 
10.0F), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: manifest has unknown NaN 
info").isTrue();
+    shouldRead =
+        ManifestEvaluator.forRowFilter(notIn("single_value_no_nan", 5.0F, 
10.0F), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead)
+        .as("Should not read: manifest contains single float value with no 
NaNs")
+        .isFalse();
+  }
 }

Reply via email to