(iceberg) branch main updated: API, Core: Update inclusive metrics evaluator for extract and transforms (#12311)

blue Tue, 25 Feb 2025 16:49:35 -0800

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git



The following commit(s) were added to refs/heads/main by this push:
     new 2f88ff66d0 API, Core: Update inclusive metrics evaluator for extract 
and transforms (#12311)
2f88ff66d0 is described below

commit 2f88ff66d05269b04e3621fe067ccdab668f3191
Author: Ryan Blue <[email protected]>
AuthorDate: Tue Feb 25 16:48:12 2025 -0800

    API, Core: Update inclusive metrics evaluator for extract and transforms 
(#12311)
---
 .../expressions/InclusiveMetricsEvaluator.java     | 473 ++++++++------
 .../iceberg/expressions/VariantExpressionUtil.java | 118 ++++
 .../TestInclusiveMetricsEvaluatorWithExtract.java  | 678 +++++++++++++++++++++
 ...estInclusiveMetricsEvaluatorWithTransforms.java | 661 ++++++++++++++++++++
 .../apache/iceberg/spark/source/TestSparkScan.java |  23 +-
 .../apache/iceberg/spark/source/TestSparkScan.java |  23 +-
 6 files changed, 1777 insertions(+), 199 deletions(-)

diff --git 
a/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
 
b/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
index 172b6a727d..e667b096c2 100644
--- 
a/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
+++ 
b/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
@@ -29,12 +29,13 @@ import java.util.stream.Collectors;
 import org.apache.iceberg.ContentFile;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.Schema;
-import 
org.apache.iceberg.expressions.ExpressionVisitors.BoundExpressionVisitor;
+import org.apache.iceberg.transforms.Transform;
 import org.apache.iceberg.types.Comparators;
 import org.apache.iceberg.types.Conversions;
 import org.apache.iceberg.types.Types.StructType;
-import org.apache.iceberg.util.BinaryUtil;
 import org.apache.iceberg.util.NaNUtil;
+import org.apache.iceberg.variants.Variant;
+import org.apache.iceberg.variants.VariantObject;
 
 /**
  * Evaluates an {@link Expression} on a {@link DataFile} to test whether rows 
in the file may match.
@@ -79,7 +80,7 @@ public class InclusiveMetricsEvaluator {
   private static final boolean ROWS_MIGHT_MATCH = true;
   private static final boolean ROWS_CANNOT_MATCH = false;
 
-  private class MetricsEvalVisitor extends BoundExpressionVisitor<Boolean> {
+  private class MetricsEvalVisitor extends 
ExpressionVisitors.BoundVisitor<Boolean> {
     private Map<Integer, Long> valueCounts = null;
     private Map<Integer, Long> nullCounts = null;
     private Map<Integer, Long> nanCounts = null;
@@ -107,19 +108,6 @@ public class InclusiveMetricsEvaluator {
       return ExpressionVisitors.visitEvaluator(expr, this);
     }
 
-    @Override
-    public <T> Boolean handleNonReference(Bound<T> term) {
-      // If the term in any expression is not a direct reference, assume that 
rows may match. This
-      // happens when
-      // transforms or other expressions are passed to this evaluator. For 
example, bucket16(x) = 0
-      // can't be determined
-      // because this visitor operates on data metrics and not partition 
values. It may be possible
-      // to un-transform
-      // expressions for order preserving transforms in the future, but this 
is not currently
-      // supported.
-      return ROWS_MIGHT_MATCH;
-    }
-
     @Override
     public Boolean alwaysTrue() {
       return ROWS_MIGHT_MATCH; // all rows match
@@ -146,24 +134,27 @@ public class InclusiveMetricsEvaluator {
     }
 
     @Override
-    public <T> Boolean isNull(BoundReference<T> ref) {
+    public <T> Boolean isNull(Bound<T> term) {
       // no need to check whether the field is required because binding 
evaluates that case
       // if the column has no null values, the expression cannot match
-      Integer id = ref.fieldId();
-
-      if (nullCounts != null && nullCounts.containsKey(id) && 
nullCounts.get(id) == 0) {
-        return ROWS_CANNOT_MATCH;
+      if (isNonNullPreserving(term)) {
+        // number of non-nulls is the same as for the ref
+        Integer id = term.ref().fieldId();
+        if (!mayContainNull(id)) {
+          return ROWS_CANNOT_MATCH;
+        }
       }
 
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean notNull(BoundReference<T> ref) {
+    public <T> Boolean notNull(Bound<T> term) {
       // no need to check whether the field is required because binding 
evaluates that case
       // if the column has no non-null values, the expression cannot match
-      Integer id = ref.fieldId();
 
+      // all terms are null preserving. see #isNullPreserving(Bound)
+      Integer id = term.ref().fieldId();
       if (containsNullsOnly(id)) {
         return ROWS_CANNOT_MATCH;
       }
@@ -172,25 +163,33 @@ public class InclusiveMetricsEvaluator {
     }
 
     @Override
-    public <T> Boolean isNaN(BoundReference<T> ref) {
-      Integer id = ref.fieldId();
-
-      if (nanCounts != null && nanCounts.containsKey(id) && nanCounts.get(id) 
== 0) {
-        return ROWS_CANNOT_MATCH;
-      }
-
+    public <T> Boolean isNaN(Bound<T> term) {
       // when there's no nanCounts information, but we already know the column 
only contains null,
       // it's guaranteed that there's no NaN value
+      Integer id = term.ref().fieldId();
       if (containsNullsOnly(id)) {
         return ROWS_CANNOT_MATCH;
       }
 
+      if (!(term instanceof BoundReference)) {
+        return ROWS_MIGHT_MATCH;
+      }
+
+      if (nanCounts != null && nanCounts.containsKey(id) && nanCounts.get(id) 
== 0) {
+        return ROWS_CANNOT_MATCH;
+      }
+
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean notNaN(BoundReference<T> ref) {
-      Integer id = ref.fieldId();
+    public <T> Boolean notNaN(Bound<T> term) {
+      if (!(term instanceof BoundReference)) {
+        // identity transforms are already removed by this time
+        return ROWS_MIGHT_MATCH;
+      }
+
+      Integer id = term.ref().fieldId();
 
       if (containsNaNsOnly(id)) {
         return ROWS_CANNOT_MATCH;
@@ -200,140 +199,141 @@ public class InclusiveMetricsEvaluator {
     }
 
     @Override
-    public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
-      Integer id = ref.fieldId();
-
+    public <T> Boolean lt(Bound<T> term, Literal<T> lit) {
+      // all terms are null preserving. see #isNullPreserving(Bound)
+      Integer id = term.ref().fieldId();
       if (containsNullsOnly(id) || containsNaNsOnly(id)) {
         return ROWS_CANNOT_MATCH;
       }
 
-      if (lowerBounds != null && lowerBounds.containsKey(id)) {
-        T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
-
-        if (NaNUtil.isNaN(lower)) {
-          // NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
-          return ROWS_MIGHT_MATCH;
-        }
+      T lower = lowerBound(term);
+      if (null == lower || NaNUtil.isNaN(lower)) {
+        // NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator 
docs for more.
+        return ROWS_MIGHT_MATCH;
+      }
 
-        int cmp = lit.comparator().compare(lower, lit.value());
-        if (cmp >= 0) {
-          return ROWS_CANNOT_MATCH;
-        }
+      // this also works for transforms that are order preserving:
+      // if a transform f is order preserving, a < b means that f(a) <= f(b).
+      // because lower <= a for all values of a in the file, f(lower) <= f(a).
+      // when f(lower) >= X then f(a) >= f(lower) >= X, so there is no a such 
that f(a) < X
+      // f(lower) >= X means rows cannot match
+      int cmp = lit.comparator().compare(lower, lit.value());
+      if (cmp >= 0) {
+        return ROWS_CANNOT_MATCH;
       }
 
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
-      Integer id = ref.fieldId();
-
+    public <T> Boolean ltEq(Bound<T> term, Literal<T> lit) {
+      // all terms are null preserving. see #isNullPreserving(Bound)
+      Integer id = term.ref().fieldId();
       if (containsNullsOnly(id) || containsNaNsOnly(id)) {
         return ROWS_CANNOT_MATCH;
       }
 
-      if (lowerBounds != null && lowerBounds.containsKey(id)) {
-        T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
-
-        if (NaNUtil.isNaN(lower)) {
-          // NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
-          return ROWS_MIGHT_MATCH;
-        }
+      T lower = lowerBound(term);
+      if (null == lower || NaNUtil.isNaN(lower)) {
+        // NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator 
docs for more.
+        return ROWS_MIGHT_MATCH;
+      }
 
-        int cmp = lit.comparator().compare(lower, lit.value());
-        if (cmp > 0) {
-          return ROWS_CANNOT_MATCH;
-        }
+      // this also works for transforms that are order preserving:
+      // if a transform f is order preserving, a < b means that f(a) <= f(b).
+      // because lower <= a for all values of a in the file, f(lower) <= f(a).
+      // when f(lower) > X then f(a) >= f(lower) > X, so there is no a such 
that f(a) <= X
+      // f(lower) > X means rows cannot match
+      int cmp = lit.comparator().compare(lower, lit.value());
+      if (cmp > 0) {
+        return ROWS_CANNOT_MATCH;
       }
 
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
-      Integer id = ref.fieldId();
-
+    public <T> Boolean gt(Bound<T> term, Literal<T> lit) {
+      // all terms are null preserving. see #isNullPreserving(Bound)
+      Integer id = term.ref().fieldId();
       if (containsNullsOnly(id) || containsNaNsOnly(id)) {
         return ROWS_CANNOT_MATCH;
       }
 
-      if (upperBounds != null && upperBounds.containsKey(id)) {
-        T upper = Conversions.fromByteBuffer(ref.type(), upperBounds.get(id));
+      T upper = upperBound(term);
+      if (null == upper) {
+        return ROWS_MIGHT_MATCH;
+      }
 
-        int cmp = lit.comparator().compare(upper, lit.value());
-        if (cmp <= 0) {
-          return ROWS_CANNOT_MATCH;
-        }
+      int cmp = lit.comparator().compare(upper, lit.value());
+      if (cmp <= 0) {
+        return ROWS_CANNOT_MATCH;
       }
 
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
-      Integer id = ref.fieldId();
-
+    public <T> Boolean gtEq(Bound<T> term, Literal<T> lit) {
+      // all terms are null preserving. see #isNullPreserving(Bound)
+      Integer id = term.ref().fieldId();
       if (containsNullsOnly(id) || containsNaNsOnly(id)) {
         return ROWS_CANNOT_MATCH;
       }
 
-      if (upperBounds != null && upperBounds.containsKey(id)) {
-        T upper = Conversions.fromByteBuffer(ref.type(), upperBounds.get(id));
+      T upper = upperBound(term);
+      if (null == upper) {
+        return ROWS_MIGHT_MATCH;
+      }
 
-        int cmp = lit.comparator().compare(upper, lit.value());
-        if (cmp < 0) {
-          return ROWS_CANNOT_MATCH;
-        }
+      int cmp = lit.comparator().compare(upper, lit.value());
+      if (cmp < 0) {
+        return ROWS_CANNOT_MATCH;
       }
 
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
-      Integer id = ref.fieldId();
-
+    public <T> Boolean eq(Bound<T> term, Literal<T> lit) {
+      // all terms are null preserving. see #isNullPreserving(Bound)
+      Integer id = term.ref().fieldId();
       if (containsNullsOnly(id) || containsNaNsOnly(id)) {
         return ROWS_CANNOT_MATCH;
       }
 
-      if (lowerBounds != null && lowerBounds.containsKey(id)) {
-        T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
-
-        if (NaNUtil.isNaN(lower)) {
-          // NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
-          return ROWS_MIGHT_MATCH;
-        }
-
+      T lower = lowerBound(term);
+      if (lower != null && !NaNUtil.isNaN(lower)) {
         int cmp = lit.comparator().compare(lower, lit.value());
         if (cmp > 0) {
           return ROWS_CANNOT_MATCH;
         }
       }
 
-      if (upperBounds != null && upperBounds.containsKey(id)) {
-        T upper = Conversions.fromByteBuffer(ref.type(), upperBounds.get(id));
+      T upper = upperBound(term);
+      if (null == upper) {
+        return ROWS_MIGHT_MATCH;
+      }
 
-        int cmp = lit.comparator().compare(upper, lit.value());
-        if (cmp < 0) {
-          return ROWS_CANNOT_MATCH;
-        }
+      int cmp = lit.comparator().compare(upper, lit.value());
+      if (cmp < 0) {
+        return ROWS_CANNOT_MATCH;
       }
 
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+    public <T> Boolean notEq(Bound<T> term, Literal<T> lit) {
       // because the bounds are not necessarily a min or max value, this 
cannot be answered using
       // them. notEq(col, X) with (X, Y) doesn't guarantee that X is a value 
in col.
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
-      Integer id = ref.fieldId();
-
+    public <T> Boolean in(Bound<T> term, Set<T> literalSet) {
+      // all terms are null preserving. see #isNullPreserving(Bound)
+      Integer id = term.ref().fieldId();
       if (containsNullsOnly(id) || containsNaNsOnly(id)) {
         return ROWS_CANNOT_MATCH;
       }
@@ -345,125 +345,126 @@ public class InclusiveMetricsEvaluator {
         return ROWS_MIGHT_MATCH;
       }
 
-      if (lowerBounds != null && lowerBounds.containsKey(id)) {
-        T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
+      T lower = lowerBound(term);
+      if (null == lower || NaNUtil.isNaN(lower)) {
+        // NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator 
docs for more.
+        return ROWS_MIGHT_MATCH;
+      }
 
-        if (NaNUtil.isNaN(lower)) {
-          // NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
-          return ROWS_MIGHT_MATCH;
-        }
+      literals =
+          literals.stream()
+              .filter(v -> ((BoundTerm<T>) term).comparator().compare(lower, 
v) <= 0)
+              .collect(Collectors.toList());
+      // if all values are less than lower bound, rows cannot match
+      if (literals.isEmpty()) {
+        return ROWS_CANNOT_MATCH;
+      }
 
-        literals =
-            literals.stream()
-                .filter(v -> ref.comparator().compare(lower, v) <= 0)
-                .collect(Collectors.toList());
-        if (literals.isEmpty()) { // if all values are less than lower bound, 
rows cannot match.
-          return ROWS_CANNOT_MATCH;
-        }
+      T upper = upperBound(term);
+      if (null == upper) {
+        return ROWS_MIGHT_MATCH;
       }
 
-      if (upperBounds != null && upperBounds.containsKey(id)) {
-        T upper = Conversions.fromByteBuffer(ref.type(), upperBounds.get(id));
-        literals =
-            literals.stream()
-                .filter(v -> ref.comparator().compare(upper, v) >= 0)
-                .collect(Collectors.toList());
-        if (literals
-            .isEmpty()) { // if all remaining values are greater than upper 
bound, rows cannot
-          // match.
-          return ROWS_CANNOT_MATCH;
-        }
+      literals =
+          literals.stream()
+              .filter(v -> ((BoundTerm<T>) term).comparator().compare(upper, 
v) >= 0)
+              .collect(Collectors.toList());
+      // if remaining values are greater than upper bound, rows cannot match
+      if (literals.isEmpty()) {
+        return ROWS_CANNOT_MATCH;
       }
 
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+    public <T> Boolean notIn(Bound<T> term, Set<T> literalSet) {
       // because the bounds are not necessarily a min or max value, this 
cannot be answered using
       // them. notIn(col, {X, ...}) with (X, Y) doesn't guarantee that X is a 
value in col.
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
-      Integer id = ref.fieldId();
+    public <T> Boolean startsWith(Bound<T> term, Literal<T> lit) {
+      if (term instanceof BoundTransform
+          && !((BoundTransform<?, ?>) term).transform().isIdentity()) {
+        // truncate must be rewritten in binding. the result is either always 
or never compatible
+        return ROWS_MIGHT_MATCH;
+      }
 
+      Integer id = term.ref().fieldId();
       if (containsNullsOnly(id)) {
         return ROWS_CANNOT_MATCH;
       }
 
-      ByteBuffer prefixAsBytes = lit.toByteBuffer();
+      String prefix = (String) lit.value();
 
-      Comparator<ByteBuffer> comparator = Comparators.unsignedBytes();
+      Comparator<CharSequence> comparator = Comparators.charSequences();
 
-      if (lowerBounds != null && lowerBounds.containsKey(id)) {
-        ByteBuffer lower = lowerBounds.get(id);
-        // truncate lower bound so that its length in bytes is not greater 
than the length of prefix
-        int length = Math.min(prefixAsBytes.remaining(), lower.remaining());
-        int cmp = comparator.compare(BinaryUtil.truncateBinary(lower, length), 
prefixAsBytes);
-        if (cmp > 0) {
-          return ROWS_CANNOT_MATCH;
-        }
+      CharSequence lower = (CharSequence) lowerBound(term);
+      if (null == lower) {
+        return ROWS_MIGHT_MATCH;
       }
 
-      if (upperBounds != null && upperBounds.containsKey(id)) {
-        ByteBuffer upper = upperBounds.get(id);
-        // truncate upper bound so that its length in bytes is not greater 
than the length of prefix
-        int length = Math.min(prefixAsBytes.remaining(), upper.remaining());
-        int cmp = comparator.compare(BinaryUtil.truncateBinary(upper, length), 
prefixAsBytes);
-        if (cmp < 0) {
-          return ROWS_CANNOT_MATCH;
-        }
+      // truncate lower bound so that its length in bytes is not greater than 
the length of prefix
+      int length = Math.min(prefix.length(), lower.length());
+      int cmp = comparator.compare(lower.subSequence(0, length), prefix);
+      if (cmp > 0) {
+        return ROWS_CANNOT_MATCH;
+      }
+
+      CharSequence upper = (CharSequence) upperBound(term);
+      if (null == upper) {
+        return ROWS_MIGHT_MATCH;
+      }
+
+      // truncate upper bound so that its length in bytes is not greater than 
the length of prefix
+      length = Math.min(prefix.length(), upper.length());
+      cmp = comparator.compare(upper.subSequence(0, length), prefix);
+      if (cmp < 0) {
+        return ROWS_CANNOT_MATCH;
       }
 
       return ROWS_MIGHT_MATCH;
     }
 
     @Override
-    public <T> Boolean notStartsWith(BoundReference<T> ref, Literal<T> lit) {
-      Integer id = ref.fieldId();
-
+    public <T> Boolean notStartsWith(Bound<T> term, Literal<T> lit) {
+      // the only transforms that produce strings are truncate and identity, 
which work with this
+      Integer id = term.ref().fieldId();
       if (mayContainNull(id)) {
         return ROWS_MIGHT_MATCH;
       }
 
-      ByteBuffer prefixAsBytes = lit.toByteBuffer();
+      String prefix = (String) lit.value();
 
-      Comparator<ByteBuffer> comparator = Comparators.unsignedBytes();
+      Comparator<CharSequence> comparator = Comparators.charSequences();
 
       // notStartsWith will match unless all values must start with the 
prefix. This happens when
-      // the lower and upper
-      // bounds both start with the prefix.
-      if (lowerBounds != null
-          && upperBounds != null
-          && lowerBounds.containsKey(id)
-          && upperBounds.containsKey(id)) {
-        ByteBuffer lower = lowerBounds.get(id);
-        // if lower is shorter than the prefix then lower doesn't start with 
the prefix
-        if (lower.remaining() < prefixAsBytes.remaining()) {
+      // the lower and upper bounds both start with the prefix.
+      CharSequence lower = (CharSequence) lowerBound(term);
+      CharSequence upper = (CharSequence) upperBound(term);
+      if (null == lower || null == upper) {
+        return ROWS_MIGHT_MATCH;
+      }
+
+      // if lower is shorter than the prefix then lower doesn't start with the 
prefix
+      if (lower.length() < prefix.length()) {
+        return ROWS_MIGHT_MATCH;
+      }
+
+      int cmp = comparator.compare(lower.subSequence(0, prefix.length()), 
prefix);
+      if (cmp == 0) {
+        // if upper is shorter than the prefix then upper can't start with the 
prefix
+        if (upper.length() < prefix.length()) {
           return ROWS_MIGHT_MATCH;
         }
 
-        int cmp =
-            comparator.compare(
-                BinaryUtil.truncateBinary(lower, prefixAsBytes.remaining()), 
prefixAsBytes);
+        cmp = comparator.compare(upper.subSequence(0, prefix.length()), 
prefix);
         if (cmp == 0) {
-          ByteBuffer upper = upperBounds.get(id);
-          // if upper is shorter than the prefix then upper can't start with 
the prefix
-          if (upper.remaining() < prefixAsBytes.remaining()) {
-            return ROWS_MIGHT_MATCH;
-          }
-
-          cmp =
-              comparator.compare(
-                  BinaryUtil.truncateBinary(upper, prefixAsBytes.remaining()), 
prefixAsBytes);
-          if (cmp == 0) {
-            // both bounds match the prefix, so all rows must match the prefix 
and therefore do not
-            // satisfy
-            // the predicate
-            return ROWS_CANNOT_MATCH;
-          }
+          // both bounds match the prefix, so all rows must match the prefix 
and therefore do not
+          // satisfy the predicate
+          return ROWS_CANNOT_MATCH;
         }
       }
 
@@ -471,7 +472,7 @@ public class InclusiveMetricsEvaluator {
     }
 
     private boolean mayContainNull(Integer id) {
-      return nullCounts == null || (nullCounts.containsKey(id) && 
nullCounts.get(id) != 0);
+      return nullCounts == null || !nullCounts.containsKey(id) || 
nullCounts.get(id) != 0;
     }
 
     private boolean containsNullsOnly(Integer id) {
@@ -488,5 +489,123 @@ public class InclusiveMetricsEvaluator {
           && valueCounts != null
           && nanCounts.get(id).equals(valueCounts.get(id));
     }
+
+    private <T> T lowerBound(Bound<T> term) {
+      if (term instanceof BoundReference) {
+        return parseLowerBound((BoundReference<T>) term);
+      } else if (term instanceof BoundTransform) {
+        return transformLowerBound((BoundTransform<?, T>) term);
+      } else if (term instanceof BoundExtract) {
+        return extractLowerBound((BoundExtract<T>) term);
+      } else {
+        return null;
+      }
+    }
+
+    private <T> T upperBound(Bound<T> term) {
+      if (term instanceof BoundReference) {
+        return parseUpperBound((BoundReference<T>) term);
+      } else if (term instanceof BoundTransform) {
+        return transformUpperBound((BoundTransform<?, T>) term);
+      } else if (term instanceof BoundExtract) {
+        return extractUpperBound((BoundExtract<T>) term);
+      } else {
+        return null;
+      }
+    }
+
+    private <T> T parseLowerBound(BoundReference<T> ref) {
+      Integer id = ref.fieldId();
+      if (lowerBounds != null && lowerBounds.containsKey(id)) {
+        return Conversions.fromByteBuffer(ref.ref().type(), 
lowerBounds.get(id));
+      }
+
+      return null;
+    }
+
+    private <T> T parseUpperBound(BoundReference<T> ref) {
+      Integer id = ref.fieldId();
+      if (upperBounds != null && upperBounds.containsKey(id)) {
+        return Conversions.fromByteBuffer(ref.ref().type(), 
upperBounds.get(id));
+      }
+
+      return null;
+    }
+
+    private <S, T> T transformLowerBound(BoundTransform<S, T> boundTransform) {
+      Transform<S, T> transform = boundTransform.transform();
+      if (transform.preservesOrder()) {
+        S lower = parseLowerBound(boundTransform.ref());
+        return 
boundTransform.transform().bind(boundTransform.ref().type()).apply(lower);
+      }
+
+      return null;
+    }
+
+    private <S, T> T transformUpperBound(BoundTransform<S, T> boundTransform) {
+      Transform<S, T> transform = boundTransform.transform();
+      if (transform.preservesOrder()) {
+        S upper = parseUpperBound(boundTransform.ref());
+        return 
boundTransform.transform().bind(boundTransform.ref().type()).apply(upper);
+      }
+
+      return null;
+    }
+
+    private <T> T extractLowerBound(BoundExtract<T> bound) {
+      Integer id = bound.ref().fieldId();
+      if (lowerBounds != null && lowerBounds.containsKey(id)) {
+        VariantObject fieldLowerBounds = parseBounds(lowerBounds.get(id));
+        return VariantExpressionUtil.castTo(
+            fieldLowerBounds.get(bound.fullFieldName()), bound.type());
+      }
+
+      return null;
+    }
+
+    private <T> T extractUpperBound(BoundExtract<T> bound) {
+      Integer id = bound.ref().fieldId();
+      if (upperBounds != null && upperBounds.containsKey(id)) {
+        VariantObject fieldUpperBounds = parseBounds(upperBounds.get(id));
+        return VariantExpressionUtil.castTo(
+            fieldUpperBounds.get(bound.fullFieldName()), bound.type());
+      }
+
+      return null;
+    }
+
+    /** Returns true if the expression term produces a null value for a null 
input. */
+    //    private boolean isNullPreserving(Bound<?> term) {
+    //      if (term instanceof BoundReference) {
+    //        return true;
+    //      } else if (term instanceof BoundTransform<?, ?>) {
+    //        // transforms must map null to null
+    //        return true;
+    //      } else if (term instanceof BoundExtract) {
+    //        // a null variant contains no non-null values
+    //        return true;
+    //      }
+    //
+    //      // unknown cases are not null preserving
+    //      return false;
+    //    }
+
+    /** Returns true if the expression term produces a non-null value for 
non-null input. */
+    private boolean isNonNullPreserving(Bound<?> term) {
+      if (term instanceof BoundReference) {
+        return true;
+      } else if (term instanceof BoundTransform<?, ?>) {
+        // if the transform preserves order, then non-null values are mapped 
to non-null
+        return ((BoundTransform<?, ?>) term).transform().preservesOrder();
+      }
+
+      // a non-null variant does not necessarily contain a specific field
+      // and unknown bound terms are not non-null preserving
+      return false;
+    }
+  }
+
+  private static VariantObject parseBounds(ByteBuffer buffer) {
+    return Variant.from(buffer).value().asObject();
   }
 }
diff --git 
a/api/src/main/java/org/apache/iceberg/expressions/VariantExpressionUtil.java 
b/api/src/main/java/org/apache/iceberg/expressions/VariantExpressionUtil.java
new file mode 100644
index 0000000000..dca11d5e46
--- /dev/null
+++ 
b/api/src/main/java/org/apache/iceberg/expressions/VariantExpressionUtil.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.expressions;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.Map;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.variants.PhysicalType;
+import org.apache.iceberg.variants.VariantValue;
+
+class VariantExpressionUtil {
+  // TODO: Implement PhysicalType.TIME
+  // TODO: Implement PhysicalType.TIMESTAMPNTZ_NANO and 
PhysicalType.TIMESTAMPTZ_NANO
+  // TODO: Implement PhysicalType.UUID
+  private static final Map<Type, PhysicalType> NO_CONVERSION_NEEDED =
+      ImmutableMap.<Type, PhysicalType>builder()
+          .put(Types.IntegerType.get(), PhysicalType.INT32)
+          .put(Types.LongType.get(), PhysicalType.INT64)
+          .put(Types.FloatType.get(), PhysicalType.FLOAT)
+          .put(Types.DoubleType.get(), PhysicalType.DOUBLE)
+          .put(Types.DateType.get(), PhysicalType.DATE)
+          .put(Types.TimestampType.withoutZone(), PhysicalType.TIMESTAMPNTZ)
+          .put(Types.TimestampType.withZone(), PhysicalType.TIMESTAMPTZ)
+          .put(Types.StringType.get(), PhysicalType.STRING)
+          .put(Types.BinaryType.get(), PhysicalType.BINARY)
+          .put(Types.UnknownType.get(), PhysicalType.NULL)
+          .build();
+
+  private VariantExpressionUtil() {}
+
+  @SuppressWarnings("unchecked")
+  static <T> T castTo(VariantValue value, Type type) {
+    if (value == null) {
+      return null;
+    } else if (NO_CONVERSION_NEEDED.get(type) == value.type()) {
+      return (T) value.asPrimitive().get();
+    }
+
+    switch (type.typeId()) {
+      case INTEGER:
+        switch (value.type()) {
+          case INT8:
+          case INT16:
+            return (T) (Integer) ((Number) 
value.asPrimitive().get()).intValue();
+        }
+
+        break;
+      case LONG:
+        switch (value.type()) {
+          case INT8:
+          case INT16:
+          case INT32:
+            return (T) (Long) ((Number) value.asPrimitive().get()).longValue();
+        }
+
+        break;
+      case DOUBLE:
+        if (value.type() == PhysicalType.FLOAT) {
+          return (T) (Double) ((Number) 
value.asPrimitive().get()).doubleValue();
+        }
+
+        break;
+      case FIXED:
+        Types.FixedType fixedType = (Types.FixedType) type;
+        if (value.type() == PhysicalType.BINARY) {
+          ByteBuffer buffer = (ByteBuffer) value.asPrimitive().get();
+          if (buffer.remaining() == fixedType.length()) {
+            return (T) buffer;
+          }
+        }
+
+        break;
+      case DECIMAL:
+        Types.DecimalType decimalType = (Types.DecimalType) type;
+        switch (value.type()) {
+          case DECIMAL4:
+          case DECIMAL8:
+          case DECIMAL16:
+            BigDecimal decimalValue = (BigDecimal) value.asPrimitive().get();
+            if (decimalValue.scale() == decimalType.scale()) {
+              return (T) decimalValue;
+            }
+        }
+
+        break;
+      case BOOLEAN:
+        switch (value.type()) {
+          case BOOLEAN_FALSE:
+            return (T) Boolean.FALSE;
+          case BOOLEAN_TRUE:
+            return (T) Boolean.TRUE;
+        }
+
+        break;
+    }
+
+    return null;
+  }
+}
diff --git 
a/core/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluatorWithExtract.java
 
b/core/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluatorWithExtract.java
new file mode 100644
index 0000000000..76a1296a6c
--- /dev/null
+++ 
b/core/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluatorWithExtract.java
@@ -0,0 +1,678 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.expressions;
+
+import static org.apache.iceberg.expressions.Expressions.and;
+import static org.apache.iceberg.expressions.Expressions.equal;
+import static org.apache.iceberg.expressions.Expressions.extract;
+import static org.apache.iceberg.expressions.Expressions.greaterThan;
+import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.in;
+import static org.apache.iceberg.expressions.Expressions.isNaN;
+import static org.apache.iceberg.expressions.Expressions.isNull;
+import static org.apache.iceberg.expressions.Expressions.lessThan;
+import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.not;
+import static org.apache.iceberg.expressions.Expressions.notEqual;
+import static org.apache.iceberg.expressions.Expressions.notIn;
+import static org.apache.iceberg.expressions.Expressions.notNaN;
+import static org.apache.iceberg.expressions.Expressions.notNull;
+import static org.apache.iceberg.expressions.Expressions.notStartsWith;
+import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.TestHelpers.Row;
+import org.apache.iceberg.TestHelpers.TestDataFile;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.types.Types.IntegerType;
+import org.apache.iceberg.variants.VariantTestUtil;
+import org.apache.iceberg.variants.Variants;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.FieldSource;
+
+public class TestInclusiveMetricsEvaluatorWithExtract {
+  private static final Schema SCHEMA =
+      new Schema(
+          required(1, "id", IntegerType.get()),
+          required(2, "variant", Types.VariantType.get()),
+          optional(3, "all_nulls", Types.VariantType.get()));
+
+  private static final int INT_MIN_VALUE = 30;
+  private static final int INT_MAX_VALUE = 79;
+
+  private static final DataFile FILE =
+      new TestDataFile(
+          "file.avro",
+          Row.of(),
+          50,
+          // any value counts, including nulls
+          ImmutableMap.<Integer, Long>builder().put(1, 50L).put(2, 50L).put(3, 
50L).buildOrThrow(),
+          // null value counts
+          ImmutableMap.<Integer, Long>builder()
+              .put(1, 0L)
+              .put(2, 0L)
+              .put(3, 50L) // all_nulls
+              .buildOrThrow(),
+          // nan value counts
+          null,
+          // lower bounds
+          ImmutableMap.of(
+              2,
+              VariantTestUtil.variantBuffer(
+                  Map.of("event_id", Variants.of(INT_MIN_VALUE), "str", 
Variants.of("abc")))),
+          // upper bounds
+          ImmutableMap.of(
+              2,
+              VariantTestUtil.variantBuffer(
+                  Map.of("event_id", Variants.of(INT_MAX_VALUE), "str", 
Variants.of("abe")))));
+
+  private boolean shouldRead(Expression expr) {
+    return shouldRead(expr, FILE);
+  }
+
+  private boolean shouldRead(Expression expr, DataFile file) {
+    return shouldRead(expr, file, true);
+  }
+
+  private boolean shouldReadCaseInsensitive(Expression expr) {
+    return shouldRead(expr, FILE, false);
+  }
+
+  private boolean shouldRead(Expression expr, DataFile file, boolean 
caseSensitive) {
+    return new InclusiveMetricsEvaluator(SCHEMA, expr, 
caseSensitive).eval(file);
+  }
+
+  @Test
+  public void testAllNulls() {
+    assertThat(shouldRead(isNull(extract("all_nulls", "$.event_id", "long"))))
+        .as("Should read: null values exist in all null column")
+        .isTrue();
+
+    assertThat(shouldRead(notNull(extract("all_nulls", "$.event_id", "long"))))
+        .as("Should skip: no non-null value in all null column")
+        .isFalse();
+
+    assertThat(shouldRead(lessThan(extract("all_nulls", "$.event_id", "long"), 
30)))
+        .as("Should skip: lessThan on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(lessThanOrEqual(extract("all_nulls", "$.event_id", 
"long"), 30)))
+        .as("Should skip: lessThanOrEqual on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThan(extract("all_nulls", "$.event_id", 
"long"), 30)))
+        .as("Should skip: greaterThan on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThanOrEqual(extract("all_nulls", 
"$.event_id", "long"), 30)))
+        .as("Should skip: greaterThanOrEqual on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(equal(extract("all_nulls", "$.event_id", "long"), 
30)))
+        .as("Should skip: equal on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(notEqual(extract("all_nulls", "$.event_id", "long"), 
30)))
+        .as("Should read: notEqual on all null column")
+        .isTrue();
+
+    assertThat(shouldRead(in(extract("all_nulls", "$.event_type", "string"), 
"abc", "def")))
+        .as("Should skip: in on all nulls column")
+        .isFalse();
+
+    assertThat(shouldRead(notIn(extract("all_nulls", "$.event_type", 
"string"), "abc", "def")))
+        .as("Should read: notIn on all nulls column")
+        .isTrue();
+
+    assertThat(shouldRead(startsWith(extract("all_nulls", "$.event_type", 
"string"), "a")))
+        .as("Should skip: startsWith on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(notStartsWith(extract("all_nulls", "$.event_type", 
"string"), "a")))
+        .as("Should read: notStartsWith on all null column")
+        .isTrue();
+
+    assertThat(shouldRead(isNaN(extract("all_nulls", "$.measurement", 
"double"))))
+        .as("Should skip: no NaN value in all null column")
+        .isFalse();
+
+    assertThat(shouldRead(notNaN(extract("all_nulls", "$.measurement", 
"double"))))
+        .as("Should read: all null column has non-NaN values")
+        .isTrue();
+  }
+
+  @Test
+  public void testIsNaNAndNotNaN() {
+    assertThat(shouldRead(isNaN(extract("variant", "$.measurement", 
"double"))))
+        .as("Should read: variant may contain NaN")
+        .isTrue();
+
+    assertThat(shouldRead(notNaN(extract("variant", "$.measurement", 
"double"))))
+        .as("Should read: variant may contain non-NaN")
+        .isTrue();
+  }
+
+  @Test
+  public void testMissingColumn() {
+    assertThatThrownBy(() -> shouldRead(lessThan(extract("missing", "$", 
"int"), 5)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Cannot find field 'missing'");
+  }
+
+  private static final Expression[] MISSING_STATS_EXPRESSIONS =
+      new Expression[] {
+        lessThan(extract("variant", "$.missing", "long"), 5),
+        lessThanOrEqual(extract("variant", "$.missing", "long"), 30),
+        equal(extract("variant", "$.missing", "long"), 70),
+        greaterThan(extract("variant", "$.missing", "long"), 78),
+        greaterThanOrEqual(extract("variant", "$.missing", "long"), 90),
+        notEqual(extract("variant", "$.missing", "long"), 101),
+        isNull(extract("variant", "$.missing", "string")),
+        notNull(extract("variant", "$.missing", "string")),
+        isNaN(extract("variant", "$.missing", "float")),
+        notNaN(extract("variant", "$.missing", "float"))
+      };
+
+  @ParameterizedTest
+  @FieldSource("MISSING_STATS_EXPRESSIONS")
+  public void testMissingStats(Expression expr) {
+    assertThat(shouldRead(expr)).as("Should read when missing stats for expr: 
" + expr).isTrue();
+  }
+
+  @ParameterizedTest
+  @FieldSource("MISSING_STATS_EXPRESSIONS")
+  public void testZeroRecordFile(Expression expr) {
+    DataFile empty = new TestDataFile("file.parquet", Row.of(), 0);
+    assertThat(shouldRead(expr, empty)).as("Should never read 0-record file: " 
+ expr).isFalse();
+  }
+
+  @Test
+  public void testNot() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding 
will simplify it out
+    assertThat(
+            shouldRead(not(lessThan(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25))))
+        .as("Should read: not(false)")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                not(greaterThan(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25))))
+        .as("Should skip: not(true)")
+        .isFalse();
+  }
+
+  @Test
+  public void testAnd() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding 
will simplify it out
+    assertThat(
+            shouldRead(
+                and(
+                    lessThan(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25),
+                    greaterThanOrEqual(
+                        extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 30))))
+        .as("Should skip: and(false, true)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                and(
+                    lessThan(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25),
+                    greaterThanOrEqual(
+                        extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE + 1))))
+        .as("Should skip: and(false, false)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                and(
+                    greaterThan(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25),
+                    lessThanOrEqual(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE))))
+        .as("Should read: and(true, true)")
+        .isTrue();
+  }
+
+  @Test
+  public void testOr() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding 
will simplify it out
+    assertThat(
+            shouldRead(
+                or(
+                    lessThan(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25),
+                    greaterThanOrEqual(
+                        extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE + 1))))
+        .as("Should skip: or(false, false)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                or(
+                    lessThan(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25),
+                    greaterThanOrEqual(
+                        extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE - 19))))
+        .as("Should read: or(false, true)")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerLt() {
+    assertThat(shouldRead(lessThan(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25)))
+        .as("Should not read: id range below lower bound (5 < 30)")
+        .isFalse();
+
+    assertThat(shouldRead(lessThan(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE)))
+        .as("Should not read: id range below lower bound (30 is not < 30)")
+        .isFalse();
+
+    assertThat(shouldRead(lessThan(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE + 1)))
+        .as("Should read: one possible id")
+        .isTrue();
+
+    assertThat(shouldRead(lessThan(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE)))
+        .as("Should read: may possible ids")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerLtEq() {
+    assertThat(
+            shouldRead(
+                lessThanOrEqual(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25)))
+        .as("Should not read: id range below lower bound (5 < 30)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                lessThanOrEqual(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 1)))
+        .as("Should not read: id range below lower bound (29 < 30)")
+        .isFalse();
+
+    assertThat(shouldRead(lessThanOrEqual(extract("variant", "$.event_id", 
"long"), INT_MIN_VALUE)))
+        .as("Should read: one possible id")
+        .isTrue();
+
+    assertThat(shouldRead(lessThanOrEqual(extract("variant", "$.event_id", 
"long"), INT_MAX_VALUE)))
+        .as("Should read: many possible ids")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerGt() {
+    assertThat(shouldRead(greaterThan(extract("variant", "$.event_id", 
"long"), INT_MAX_VALUE + 6)))
+        .as("Should not read: id range above upper bound (85 < 79)")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThan(extract("variant", "$.event_id", 
"long"), INT_MAX_VALUE)))
+        .as("Should not read: id range above upper bound (79 is not > 79)")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThan(extract("variant", "$.event_id", 
"long"), INT_MAX_VALUE - 1)))
+        .as("Should read: one possible id")
+        .isTrue();
+
+    assertThat(shouldRead(greaterThan(extract("variant", "$.event_id", 
"long"), INT_MAX_VALUE - 4)))
+        .as("Should read: may possible ids")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerGtEq() {
+    assertThat(
+            shouldRead(
+                greaterThanOrEqual(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE + 6)))
+        .as("Should not read: id range above upper bound (85 < 79)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                greaterThanOrEqual(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE + 1)))
+        .as("Should not read: id range above upper bound (80 > 79)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(greaterThanOrEqual(extract("variant", "$.event_id", 
"long"), INT_MAX_VALUE)))
+        .as("Should read: one possible id")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                greaterThanOrEqual(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE - 4)))
+        .as("Should read: may possible ids")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerEq() {
+    assertThat(shouldRead(equal(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25)))
+        .as("Should not read: id below lower bound")
+        .isFalse();
+
+    assertThat(shouldRead(equal(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 1)))
+        .as("Should not read: id below lower bound")
+        .isFalse();
+
+    assertThat(shouldRead(equal(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE)))
+        .as("Should read: id equal to lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(equal(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE - 4)))
+        .as("Should read: id between lower and upper bounds")
+        .isTrue();
+
+    assertThat(shouldRead(equal(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE)))
+        .as("Should read: id equal to upper bound")
+        .isTrue();
+
+    assertThat(shouldRead(equal(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE + 1)))
+        .as("Should not read: id above upper bound")
+        .isFalse();
+
+    assertThat(shouldRead(equal(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE + 6)))
+        .as("Should not read: id above upper bound")
+        .isFalse();
+  }
+
+  @Test
+  public void testIntegerNotEq() {
+    assertThat(shouldRead(notEqual(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25)))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 1)))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE)))
+        .as("Should read: id equal to lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE - 4)))
+        .as("Should read: id between lower and upper bounds")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE)))
+        .as("Should read: id equal to upper bound")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE + 1)))
+        .as("Should read: id above upper bound")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE + 6)))
+        .as("Should read: id above upper bound")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerNotEqRewritten() {
+    assertThat(shouldRead(not(equal(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 25))))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE - 1))))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(extract("variant", "$.event_id", "long"), 
INT_MIN_VALUE))))
+        .as("Should read: id equal to lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE - 4))))
+        .as("Should read: id between lower and upper bounds")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE))))
+        .as("Should read: id equal to upper bound")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE + 1))))
+        .as("Should read: id above upper bound")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(extract("variant", "$.event_id", "long"), 
INT_MAX_VALUE + 6))))
+        .as("Should read: id above upper bound")
+        .isTrue();
+  }
+
+  @Test
+  public void testCaseInsensitiveIntegerNotEqRewritten() {
+    assertThat(
+            shouldReadCaseInsensitive(
+                not(equal(extract("VARIANT", "$.event_id", "long"), 
INT_MIN_VALUE - 25))))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(
+            shouldReadCaseInsensitive(
+                not(equal(extract("VARIANT", "$.event_id", "long"), 
INT_MIN_VALUE - 1))))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(
+            shouldReadCaseInsensitive(
+                not(equal(extract("VARIANT", "$.event_id", "long"), 
INT_MIN_VALUE))))
+        .as("Should read: id equal to lower bound")
+        .isTrue();
+
+    assertThat(
+            shouldReadCaseInsensitive(
+                not(equal(extract("VARIANT", "$.event_id", "long"), 
INT_MAX_VALUE - 4))))
+        .as("Should read: id between lower and upper bounds")
+        .isTrue();
+
+    assertThat(
+            shouldReadCaseInsensitive(
+                not(equal(extract("VARIANT", "$.event_id", "long"), 
INT_MAX_VALUE))))
+        .as("Should read: id equal to upper bound")
+        .isTrue();
+
+    assertThat(
+            shouldReadCaseInsensitive(
+                not(equal(extract("VARIANT", "$.event_id", "long"), 
INT_MAX_VALUE + 1))))
+        .as("Should read: id above upper bound")
+        .isTrue();
+
+    assertThat(
+            shouldReadCaseInsensitive(
+                not(equal(extract("VARIANT", "$.event_id", "long"), 
INT_MAX_VALUE + 6))))
+        .as("Should read: id above upper bound")
+        .isTrue();
+  }
+
+  @Test
+  public void testCaseSensitiveIntegerNotEqRewritten() {
+    assertThatThrownBy(() -> shouldRead(not(equal(extract("VARIANT", 
"$.event_id", "long"), 5))))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Cannot find field 'VARIANT'");
+  }
+
+  @Test
+  public void testStringStartsWith() {
+    assertThat(shouldRead(startsWith(extract("variant", "$.str", "string"), 
"a")))
+        .as("Should read: prefix of lower and upper")
+        .isTrue();
+
+    assertThat(shouldRead(startsWith(extract("variant", "$.str", "string"), 
"ab")))
+        .as("Should read: prefix of lower and upper")
+        .isTrue();
+
+    assertThat(shouldRead(startsWith(extract("variant", "$.str", "string"), 
"abcd")))
+        .as("Should read: lower is prefix of value")
+        .isTrue();
+
+    assertThat(shouldRead(startsWith(extract("variant", "$.str", "string"), 
"abd")))
+        .as("Should read: value is between upper and lower")
+        .isTrue();
+
+    assertThat(shouldRead(startsWith(extract("variant", "$.str", "string"), 
"abex")))
+        .as("Should skip: upper is prefix of value")
+        .isFalse();
+  }
+
+  @Test
+  public void testStringNotStartsWith() {
+    assertThat(shouldRead(notStartsWith(extract("variant", "$.str", "string"), 
"a")))
+        .as("Should skip: prefix of lower and upper, all values must match")
+        .isFalse();
+
+    assertThat(shouldRead(notStartsWith(extract("variant", "$.str", "string"), 
"ab")))
+        .as("Should skip: prefix of lower and upper, all values must match")
+        .isFalse();
+
+    assertThat(shouldRead(notStartsWith(extract("variant", "$.str", "string"), 
"abcd")))
+        .as("Should skip: lower is prefix of value, some values do not match")
+        .isTrue();
+
+    assertThat(shouldRead(notStartsWith(extract("variant", "$.str", "string"), 
"abd")))
+        .as("Should read: value is between upper and lower, some values do not 
match")
+        .isTrue();
+
+    assertThat(shouldRead(notStartsWith(extract("variant", "$.str", "string"), 
"abex")))
+        .as("Should read: upper is prefix of value, some values do not match")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerIn() {
+    assertThat(
+            shouldRead(
+                in(
+                    extract("variant", "$.event_id", "long"),
+                    INT_MIN_VALUE - 25,
+                    INT_MIN_VALUE - 24)))
+        .as("Should not read: id below lower bound (5 < 30, 6 < 30)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                in(extract("variant", "$.event_id", "long"), INT_MIN_VALUE - 
2, INT_MIN_VALUE - 1)))
+        .as("Should not read: id below lower bound (28 < 30, 29 < 30)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                in(extract("variant", "$.event_id", "long"), INT_MIN_VALUE - 
1, INT_MIN_VALUE)))
+        .as("Should read: id equal to lower bound (30 == 30)")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                in(extract("variant", "$.event_id", "long"), INT_MAX_VALUE - 
4, INT_MAX_VALUE - 3)))
+        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 
< 76 < 79)")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                in(extract("variant", "$.event_id", "long"), INT_MAX_VALUE, 
INT_MAX_VALUE + 1)))
+        .as("Should read: id equal to upper bound (79 == 79)")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                in(extract("variant", "$.event_id", "long"), INT_MAX_VALUE + 
1, INT_MAX_VALUE + 2)))
+        .as("Should not read: id above upper bound (80 > 79, 81 > 79)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                in(extract("variant", "$.event_id", "long"), INT_MAX_VALUE + 
6, INT_MAX_VALUE + 7)))
+        .as("Should not read: id above upper bound (85 > 79, 86 > 79)")
+        .isFalse();
+
+    // should read as the number of elements in the in expression is too big
+    List<Integer> ids = Lists.newArrayListWithExpectedSize(400);
+    for (int id = -400; id <= 0; id++) {
+      ids.add(id);
+    }
+    assertThat(shouldRead(in(extract("variant", "$.event_id", "long"), ids)))
+        .as("Should read: large in expression")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerNotIn() {
+    assertThat(
+            shouldRead(
+                notIn(
+                    extract("variant", "$.event_id", "long"),
+                    INT_MIN_VALUE - 25,
+                    INT_MIN_VALUE - 24)))
+        .as("Should read: id below lower bound (5 < 30, 6 < 30)")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                notIn(
+                    extract("variant", "$.event_id", "long"),
+                    INT_MIN_VALUE - 2,
+                    INT_MIN_VALUE - 1)))
+        .as("Should read: id below lower bound (28 < 30, 29 < 30)")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                notIn(extract("variant", "$.event_id", "long"), INT_MIN_VALUE 
- 1, INT_MIN_VALUE)))
+        .as("Should read: id equal to lower bound (30 == 30)")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                notIn(
+                    extract("variant", "$.event_id", "long"),
+                    INT_MAX_VALUE - 4,
+                    INT_MAX_VALUE - 3)))
+        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 
< 76 < 79)")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                notIn(extract("variant", "$.event_id", "long"), INT_MAX_VALUE, 
INT_MAX_VALUE + 1)))
+        .as("Should read: id equal to upper bound (79 == 79)")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                notIn(
+                    extract("variant", "$.event_id", "long"),
+                    INT_MAX_VALUE + 1,
+                    INT_MAX_VALUE + 2)))
+        .as("Should read: id above upper bound (80 > 79, 81 > 79)")
+        .isTrue();
+
+    assertThat(
+            shouldRead(
+                notIn(
+                    extract("variant", "$.event_id", "long"),
+                    INT_MAX_VALUE + 6,
+                    INT_MAX_VALUE + 7)))
+        .as("Should read: id above upper bound (85 > 79, 86 > 79)")
+        .isTrue();
+  }
+}
diff --git 
a/core/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluatorWithTransforms.java
 
b/core/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluatorWithTransforms.java
new file mode 100644
index 0000000000..5632830064
--- /dev/null
+++ 
b/core/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluatorWithTransforms.java
@@ -0,0 +1,661 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.expressions;
+
+import static org.apache.iceberg.expressions.Expressions.and;
+import static org.apache.iceberg.expressions.Expressions.bucket;
+import static org.apache.iceberg.expressions.Expressions.day;
+import static org.apache.iceberg.expressions.Expressions.equal;
+import static org.apache.iceberg.expressions.Expressions.greaterThan;
+import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.in;
+import static org.apache.iceberg.expressions.Expressions.isNull;
+import static org.apache.iceberg.expressions.Expressions.lessThan;
+import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.not;
+import static org.apache.iceberg.expressions.Expressions.notEqual;
+import static org.apache.iceberg.expressions.Expressions.notIn;
+import static org.apache.iceberg.expressions.Expressions.notNull;
+import static org.apache.iceberg.expressions.Expressions.notStartsWith;
+import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
+import static org.apache.iceberg.expressions.Expressions.truncate;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.time.ZoneOffset;
+import java.util.List;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.TestHelpers.Row;
+import org.apache.iceberg.TestHelpers.TestDataFile;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Conversions;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.types.Types.IntegerType;
+import org.apache.iceberg.util.DateTimeUtil;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.FieldSource;
+
+public class TestInclusiveMetricsEvaluatorWithTransforms {
+  private static final Schema SCHEMA =
+      new Schema(
+          required(1, "id", IntegerType.get()),
+          required(2, "ts", Types.TimestampType.withZone()),
+          optional(3, "all_nulls", Types.IntegerType.get()),
+          optional(4, "all_nulls_str", Types.StringType.get()),
+          optional(5, "no_stats", IntegerType.get()),
+          optional(6, "str", Types.StringType.get()));
+
+  private static final int INT_MIN_VALUE = 30;
+  private static final int INT_MAX_VALUE = 79;
+
+  private static final long TS_MIN_VALUE =
+      DateTimeUtil.microsFromTimestamptz(
+          
DateTimeUtil.dateFromDays(30).atStartOfDay().atOffset(ZoneOffset.UTC));
+  private static final long TS_MAX_VALUE =
+      DateTimeUtil.microsFromTimestamptz(
+          
DateTimeUtil.dateFromDays(79).atStartOfDay().atOffset(ZoneOffset.UTC));
+
+  private static final DataFile FILE =
+      new TestDataFile(
+          "file.avro",
+          Row.of(),
+          50,
+          // any value counts, including nulls
+          ImmutableMap.<Integer, Long>builder()
+              .put(1, 50L)
+              .put(2, 50L)
+              .put(3, 50L)
+              .put(4, 50L)
+              .buildOrThrow(),
+          // null value counts
+          ImmutableMap.<Integer, Long>builder()
+              .put(1, 0L)
+              .put(2, 0L)
+              .put(3, 50L) // all_nulls
+              .put(4, 50L) // all_nulls_str
+              .buildOrThrow(),
+          // nan value counts
+          null,
+          // lower bounds
+          ImmutableMap.of(
+              2, Conversions.toByteBuffer(Types.TimestampType.withZone(), 
TS_MIN_VALUE),
+              6, Conversions.toByteBuffer(Types.StringType.get(), "abc")),
+          // upper bounds
+          ImmutableMap.of(
+              2, Conversions.toByteBuffer(Types.TimestampType.withZone(), 
TS_MAX_VALUE),
+              6, Conversions.toByteBuffer(Types.StringType.get(), "abe")));
+
+  private boolean shouldRead(Expression expr) {
+    return shouldRead(expr, FILE);
+  }
+
+  private boolean shouldRead(Expression expr, DataFile file) {
+    return shouldRead(expr, file, true);
+  }
+
+  private boolean shouldReadCaseInsensitive(Expression expr) {
+    return shouldRead(expr, FILE, false);
+  }
+
+  private boolean shouldRead(Expression expr, DataFile file, boolean 
caseSensitive) {
+    return new InclusiveMetricsEvaluator(SCHEMA, expr, 
caseSensitive).eval(file);
+  }
+
+  @Test
+  public void testAllNullsWithNonOrderPreserving() {
+    assertThat(shouldRead(isNull(bucket("all_nulls", 100))))
+        .as("Should read: null values exist in all null column")
+        .isTrue();
+
+    assertThat(shouldRead(notNull(bucket("all_nulls", 100))))
+        .as("Should skip: no non-null value in all null column")
+        .isFalse();
+
+    assertThat(shouldRead(lessThan(bucket("all_nulls", 100), 30)))
+        .as("Should skip: lessThan on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(lessThanOrEqual(bucket("all_nulls", 100), 30)))
+        .as("Should skip: lessThanOrEqual on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThan(bucket("all_nulls", 100), 30)))
+        .as("Should skip: greaterThan on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThanOrEqual(bucket("all_nulls", 100), 30)))
+        .as("Should skip: greaterThanOrEqual on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(equal(bucket("all_nulls", 100), 30)))
+        .as("Should skip: equal on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(notEqual(bucket("all_nulls", 100), 30)))
+        .as("Should read: notEqual on all null column")
+        .isTrue();
+
+    assertThat(shouldRead(in(bucket("all_nulls", 100), 1, 2)))
+        .as("Should read: in on all nulls column")
+        .isFalse();
+
+    assertThat(shouldRead(notIn(bucket("all_nulls", 100), 1, 2)))
+        .as("Should read: notIn on all nulls column")
+        .isTrue();
+  }
+
+  @Test
+  public void testRequiredWithNonOrderPreserving() {
+    assertThat(shouldRead(isNull(bucket("ts", 100))))
+        .as("Should read: non-order-preserving transform")
+        .isTrue();
+
+    assertThat(shouldRead(notNull(bucket("ts", 100))))
+        .as("Should read: non-order-preserving transform")
+        .isTrue();
+
+    assertThat(shouldRead(lessThan(bucket("ts", 100), 30)))
+        .as("Should read: non-order-preserving transform")
+        .isTrue();
+
+    assertThat(shouldRead(lessThanOrEqual(bucket("ts", 100), 30)))
+        .as("Should read: non-order-preserving transform")
+        .isTrue();
+
+    assertThat(shouldRead(greaterThan(bucket("ts", 100), 30)))
+        .as("Should read: non-order-preserving transform")
+        .isTrue();
+
+    assertThat(shouldRead(greaterThanOrEqual(bucket("ts", 100), 30)))
+        .as("Should read: non-order-preserving transform")
+        .isTrue();
+
+    assertThat(shouldRead(equal(bucket("ts", 100), 30)))
+        .as("Should read: non-order-preserving transform")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(bucket("ts", 100), 30)))
+        .as("Should read: non-order-preserving transform")
+        .isTrue();
+
+    assertThat(shouldRead(in(bucket("ts", 100), 1, 2)))
+        .as("Should read: non-order-preserving transform")
+        .isTrue();
+
+    assertThat(shouldRead(notIn(bucket("ts", 100), 1, 2)))
+        .as("Should read: non-order-preserving transform")
+        .isTrue();
+  }
+
+  @Test
+  public void testAllNulls() {
+    assertThat(shouldRead(isNull(truncate("all_nulls", 10))))
+        .as("Should read: null values exist in all null column")
+        .isTrue();
+
+    assertThat(shouldRead(notNull(truncate("all_nulls", 10))))
+        .as("Should skip: no non-null value in all null column")
+        .isFalse();
+
+    assertThat(shouldRead(lessThan(truncate("all_nulls", 10), 30)))
+        .as("Should skip: lessThan on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(lessThanOrEqual(truncate("all_nulls", 10), 30)))
+        .as("Should skip: lessThanOrEqual on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThan(truncate("all_nulls", 10), 30)))
+        .as("Should skip: greaterThan on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThanOrEqual(truncate("all_nulls", 10), 30)))
+        .as("Should skip: greaterThanOrEqual on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(equal(truncate("all_nulls", 10), 30)))
+        .as("Should skip: equal on all null column")
+        .isFalse();
+
+    assertThat(shouldRead(notEqual(truncate("all_nulls", 10), 30)))
+        .as("Should read: notEqual on all null column")
+        .isTrue();
+
+    assertThat(shouldRead(in(truncate("all_nulls", 10), 10, 20)))
+        .as("Should skip: in on all nulls column")
+        .isFalse();
+
+    assertThat(shouldRead(notIn(truncate("all_nulls", 10), 10, 20)))
+        .as("Should read: notIn on all nulls column")
+        .isTrue();
+
+    assertThat(shouldRead(startsWith(truncate("all_nulls_str", 10), "a")))
+        .as("Should skip: startsWith on all null column")
+        .isTrue(); // depends on rewriting the expression, not evaluation
+
+    assertThat(shouldRead(notStartsWith(truncate("all_nulls_str", 10), "a")))
+        .as("Should read: notStartsWith on all null column")
+        .isTrue();
+  }
+
+  @Test
+  public void testMissingColumn() {
+    assertThatThrownBy(() -> shouldRead(lessThan(truncate("missing", 10), 20)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Cannot find field 'missing'");
+  }
+
+  private static final Expression[] MISSING_STATS_EXPRESSIONS =
+      new Expression[] {
+        lessThan(truncate("no_stats", 10), 5),
+        lessThanOrEqual(truncate("no_stats", 10), 30),
+        equal(truncate("no_stats", 10), 70),
+        greaterThan(truncate("no_stats", 10), 78),
+        greaterThanOrEqual(truncate("no_stats", 10), 90),
+        notEqual(truncate("no_stats", 10), 101),
+        isNull(truncate("no_stats", 10)),
+        notNull(truncate("no_stats", 10))
+      };
+
+  @ParameterizedTest
+  @FieldSource("MISSING_STATS_EXPRESSIONS")
+  public void testMissingStats(Expression expr) {
+    assertThat(shouldRead(expr)).as("Should read when missing stats for expr: 
" + expr).isTrue();
+  }
+
+  @ParameterizedTest
+  @FieldSource("MISSING_STATS_EXPRESSIONS")
+  public void testZeroRecordFile(Expression expr) {
+    DataFile empty = new TestDataFile("file.parquet", Row.of(), 0);
+    assertThat(shouldRead(expr, empty)).as("Should never read 0-record file: " 
+ expr).isFalse();
+  }
+
+  @Test
+  public void testNot() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding 
will simplify it out
+    assertThat(shouldRead(not(lessThan(day("ts"), INT_MIN_VALUE - 25))))
+        .as("Should read: not(false)")
+        .isTrue();
+
+    assertThat(shouldRead(not(greaterThan(day("ts"), INT_MIN_VALUE - 25))))
+        .as("Should skip: not(true)")
+        .isFalse();
+  }
+
+  @Test
+  public void testAnd() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding 
will simplify it out
+    assertThat(
+            shouldRead(
+                and(
+                    lessThan(day("ts"), INT_MIN_VALUE - 25),
+                    greaterThanOrEqual(day("ts"), INT_MIN_VALUE - 30))))
+        .as("Should skip: and(false, true)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                and(
+                    lessThan(day("ts"), INT_MIN_VALUE - 25),
+                    greaterThanOrEqual(day("ts"), INT_MAX_VALUE + 1))))
+        .as("Should skip: and(false, false)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                and(
+                    greaterThan(day("ts"), INT_MIN_VALUE - 25),
+                    lessThanOrEqual(day("ts"), INT_MIN_VALUE))))
+        .as("Should read: and(true, true)")
+        .isTrue();
+  }
+
+  @Test
+  public void testOr() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding 
will simplify it out
+    assertThat(
+            shouldRead(
+                or(
+                    lessThan(day("ts"), INT_MIN_VALUE - 25),
+                    greaterThanOrEqual(day("ts"), INT_MAX_VALUE + 1))))
+        .as("Should skip: or(false, false)")
+        .isFalse();
+
+    assertThat(
+            shouldRead(
+                or(
+                    lessThan(day("ts"), INT_MIN_VALUE - 25),
+                    greaterThanOrEqual(day("ts"), INT_MAX_VALUE - 19))))
+        .as("Should read: or(false, true)")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerLt() {
+    assertThat(shouldRead(lessThan(day("ts"), INT_MIN_VALUE - 25)))
+        .as("Should not read: id range below lower bound (5 < 30)")
+        .isFalse();
+
+    assertThat(shouldRead(lessThan(day("ts"), INT_MIN_VALUE)))
+        .as("Should not read: id range below lower bound (30 is not < 30)")
+        .isFalse();
+
+    assertThat(shouldRead(lessThan(day("ts"), INT_MIN_VALUE + 1)))
+        .as("Should read: one possible id")
+        .isTrue();
+
+    assertThat(shouldRead(lessThan(day("ts"), INT_MAX_VALUE)))
+        .as("Should read: may possible ids")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerLtEq() {
+    assertThat(shouldRead(lessThanOrEqual(day("ts"), INT_MIN_VALUE - 25)))
+        .as("Should not read: id range below lower bound (5 < 30)")
+        .isFalse();
+
+    assertThat(shouldRead(lessThanOrEqual(day("ts"), INT_MIN_VALUE - 1)))
+        .as("Should not read: id range below lower bound (29 < 30)")
+        .isFalse();
+
+    assertThat(shouldRead(lessThanOrEqual(day("ts"), INT_MIN_VALUE)))
+        .as("Should read: one possible id")
+        .isTrue();
+
+    assertThat(shouldRead(lessThanOrEqual(day("ts"), INT_MAX_VALUE)))
+        .as("Should read: many possible ids")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerGt() {
+    assertThat(shouldRead(greaterThan(day("ts"), INT_MAX_VALUE + 6)))
+        .as("Should not read: id range above upper bound (85 < 79)")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThan(day("ts"), INT_MAX_VALUE)))
+        .as("Should not read: id range above upper bound (79 is not > 79)")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThan(day("ts"), INT_MAX_VALUE - 1)))
+        .as("Should read: one possible id")
+        .isTrue();
+
+    assertThat(shouldRead(greaterThan(day("ts"), INT_MAX_VALUE - 4)))
+        .as("Should read: may possible ids")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerGtEq() {
+    assertThat(shouldRead(greaterThanOrEqual(day("ts"), INT_MAX_VALUE + 6)))
+        .as("Should not read: id range above upper bound (85 < 79)")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThanOrEqual(day("ts"), INT_MAX_VALUE + 1)))
+        .as("Should not read: id range above upper bound (80 > 79)")
+        .isFalse();
+
+    assertThat(shouldRead(greaterThanOrEqual(day("ts"), INT_MAX_VALUE)))
+        .as("Should read: one possible id")
+        .isTrue();
+
+    assertThat(shouldRead(greaterThanOrEqual(day("ts"), INT_MAX_VALUE - 4)))
+        .as("Should read: may possible ids")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerEq() {
+    assertThat(shouldRead(equal(day("ts"), INT_MIN_VALUE - 25)))
+        .as("Should not read: id below lower bound")
+        .isFalse();
+
+    assertThat(shouldRead(equal(day("ts"), INT_MIN_VALUE - 1)))
+        .as("Should not read: id below lower bound")
+        .isFalse();
+
+    assertThat(shouldRead(equal(day("ts"), INT_MIN_VALUE)))
+        .as("Should read: id equal to lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(equal(day("ts"), INT_MAX_VALUE - 4)))
+        .as("Should read: id between lower and upper bounds")
+        .isTrue();
+
+    assertThat(shouldRead(equal(day("ts"), INT_MAX_VALUE)))
+        .as("Should read: id equal to upper bound")
+        .isTrue();
+
+    assertThat(shouldRead(equal(day("ts"), INT_MAX_VALUE + 1)))
+        .as("Should not read: id above upper bound")
+        .isFalse();
+
+    assertThat(shouldRead(equal(day("ts"), INT_MAX_VALUE + 6)))
+        .as("Should not read: id above upper bound")
+        .isFalse();
+  }
+
+  @Test
+  public void testIntegerNotEq() {
+    assertThat(shouldRead(notEqual(day("ts"), INT_MIN_VALUE - 25)))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(day("ts"), INT_MIN_VALUE - 1)))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(day("ts"), INT_MIN_VALUE)))
+        .as("Should read: id equal to lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(day("ts"), INT_MAX_VALUE - 4)))
+        .as("Should read: id between lower and upper bounds")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(day("ts"), INT_MAX_VALUE)))
+        .as("Should read: id equal to upper bound")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(day("ts"), INT_MAX_VALUE + 1)))
+        .as("Should read: id above upper bound")
+        .isTrue();
+
+    assertThat(shouldRead(notEqual(day("ts"), INT_MAX_VALUE + 6)))
+        .as("Should read: id above upper bound")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerNotEqRewritten() {
+    assertThat(shouldRead(not(equal(day("ts"), INT_MIN_VALUE - 25))))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(day("ts"), INT_MIN_VALUE - 1))))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(day("ts"), INT_MIN_VALUE))))
+        .as("Should read: id equal to lower bound")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(day("ts"), INT_MAX_VALUE - 4))))
+        .as("Should read: id between lower and upper bounds")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(day("ts"), INT_MAX_VALUE))))
+        .as("Should read: id equal to upper bound")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(day("ts"), INT_MAX_VALUE + 1))))
+        .as("Should read: id above upper bound")
+        .isTrue();
+
+    assertThat(shouldRead(not(equal(day("ts"), INT_MAX_VALUE + 6))))
+        .as("Should read: id above upper bound")
+        .isTrue();
+  }
+
+  @Test
+  public void testCaseInsensitiveIntegerNotEqRewritten() {
+    assertThat(shouldReadCaseInsensitive(not(equal(day("TS"), INT_MIN_VALUE - 
25))))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(shouldReadCaseInsensitive(not(equal(day("TS"), INT_MIN_VALUE - 
1))))
+        .as("Should read: id below lower bound")
+        .isTrue();
+
+    assertThat(shouldReadCaseInsensitive(not(equal(day("TS"), INT_MIN_VALUE))))
+        .as("Should read: id equal to lower bound")
+        .isTrue();
+
+    assertThat(shouldReadCaseInsensitive(not(equal(day("TS"), INT_MAX_VALUE - 
4))))
+        .as("Should read: id between lower and upper bounds")
+        .isTrue();
+
+    assertThat(shouldReadCaseInsensitive(not(equal(day("TS"), INT_MAX_VALUE))))
+        .as("Should read: id equal to upper bound")
+        .isTrue();
+
+    assertThat(shouldReadCaseInsensitive(not(equal(day("TS"), INT_MAX_VALUE + 
1))))
+        .as("Should read: id above upper bound")
+        .isTrue();
+
+    assertThat(shouldReadCaseInsensitive(not(equal(day("TS"), INT_MAX_VALUE + 
6))))
+        .as("Should read: id above upper bound")
+        .isTrue();
+  }
+
+  @Test
+  public void testCaseSensitiveIntegerNotEqRewritten() {
+    assertThatThrownBy(() -> shouldRead(not(equal(day("TS"), 5))))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Cannot find field 'TS'");
+  }
+
+  @Test
+  public void testStringStartsWith() {
+    assertThat(shouldRead(startsWith(truncate("str", 10), "a")))
+        .as("Should read: not rewritten")
+        .isTrue();
+
+    assertThat(shouldRead(startsWith(truncate("str", 10), "ab")))
+        .as("Should read: not rewritten")
+        .isTrue();
+
+    assertThat(shouldRead(startsWith(truncate("str", 10), "b")))
+        .as("Should read: not rewritten")
+        .isTrue();
+  }
+
+  @Test
+  public void testStringNotStartsWith() {
+    assertThat(shouldRead(startsWith(truncate("str", 10), "a")))
+        .as("Should read: not rewritten")
+        .isTrue();
+
+    assertThat(shouldRead(startsWith(truncate("str", 10), "ab")))
+        .as("Should read: not rewritten")
+        .isTrue();
+
+    assertThat(shouldRead(startsWith(truncate("str", 10), "b")))
+        .as("Should read: not rewritten")
+        .isTrue();
+  }
+
+  @Test
+  public void testIntegerIn() {
+    assertThat(shouldRead(in(day("ts"), INT_MIN_VALUE - 25, INT_MIN_VALUE - 
24)))
+        .as("Should not read: id below lower bound (5 < 30, 6 < 30)")
+        .isFalse();
+
+    assertThat(shouldRead(in(day("ts"), INT_MIN_VALUE - 2, INT_MIN_VALUE - 1)))
+        .as("Should not read: id below lower bound (28 < 30, 29 < 30)")
+        .isFalse();
+
+    assertThat(shouldRead(in(day("ts"), INT_MIN_VALUE - 1, INT_MIN_VALUE)))
+        .as("Should read: id equal to lower bound (30 == 30)")
+        .isTrue();
+
+    assertThat(shouldRead(in(day("ts"), INT_MAX_VALUE - 4, INT_MAX_VALUE - 3)))
+        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 
< 76 < 79)")
+        .isTrue();
+
+    assertThat(shouldRead(in(day("ts"), INT_MAX_VALUE, INT_MAX_VALUE + 1)))
+        .as("Should read: id equal to upper bound (79 == 79)")
+        .isTrue();
+
+    assertThat(shouldRead(in(day("ts"), INT_MAX_VALUE + 1, INT_MAX_VALUE + 2)))
+        .as("Should not read: id above upper bound (80 > 79, 81 > 79)")
+        .isFalse();
+
+    assertThat(shouldRead(in(day("ts"), INT_MAX_VALUE + 6, INT_MAX_VALUE + 7)))
+        .as("Should not read: id above upper bound (85 > 79, 86 > 79)")
+        .isFalse();
+
+    // should read as the number of elements in the in expression is too big
+    List<Integer> ids = Lists.newArrayListWithExpectedSize(400);
+    for (int id = -400; id <= 0; id++) {
+      ids.add(id);
+    }
+    assertThat(shouldRead(in(day("ts"), ids))).as("Should read: large in 
expression").isTrue();
+  }
+
+  @Test
+  public void testIntegerNotIn() {
+    assertThat(shouldRead(notIn(day("ts"), INT_MIN_VALUE - 25, INT_MIN_VALUE - 
24)))
+        .as("Should read: id below lower bound (5 < 30, 6 < 30)")
+        .isTrue();
+
+    assertThat(shouldRead(notIn(day("ts"), INT_MIN_VALUE - 2, INT_MIN_VALUE - 
1)))
+        .as("Should read: id below lower bound (28 < 30, 29 < 30)")
+        .isTrue();
+
+    assertThat(shouldRead(notIn(day("ts"), INT_MIN_VALUE - 1, INT_MIN_VALUE)))
+        .as("Should read: id equal to lower bound (30 == 30)")
+        .isTrue();
+
+    assertThat(shouldRead(notIn(day("ts"), INT_MAX_VALUE - 4, INT_MAX_VALUE - 
3)))
+        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 
< 76 < 79)")
+        .isTrue();
+
+    assertThat(shouldRead(notIn(day("ts"), INT_MAX_VALUE, INT_MAX_VALUE + 1)))
+        .as("Should read: id equal to upper bound (79 == 79)")
+        .isTrue();
+
+    assertThat(shouldRead(notIn(day("ts"), INT_MAX_VALUE + 1, INT_MAX_VALUE + 
2)))
+        .as("Should read: id above upper bound (80 > 79, 81 > 79)")
+        .isTrue();
+
+    assertThat(shouldRead(notIn(day("ts"), INT_MAX_VALUE + 6, INT_MAX_VALUE + 
7)))
+        .as("Should read: id above upper bound (85 > 79, 86 > 79)")
+        .isTrue();
+  }
+}
diff --git 
a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
 
b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
index a4f10d340e..d242661388 100644
--- 
a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
+++ 
b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
@@ -407,7 +407,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
 
     // NOT Equal
     builder = scanBuilder();
@@ -416,6 +416,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
+    // notEq can't be answered using column bounds because they are not exact
     assertThat(scan.planInputPartitions().length).isEqualTo(10);
   }
 
@@ -464,7 +465,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
 
     // NOT GT
     builder = scanBuilder();
@@ -473,7 +474,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
   }
 
   @Test
@@ -521,7 +522,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
 
     // NOT LT
     builder = scanBuilder();
@@ -530,7 +531,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
   }
 
   @Test
@@ -577,7 +578,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(8);
 
     // NOT GTEQ
     builder = scanBuilder();
@@ -586,7 +587,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(2);
   }
 
   @Test
@@ -734,7 +735,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
   }
 
   @Test
@@ -773,7 +774,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(0);
 
     // NOT IsNull
     builder = scanBuilder();
@@ -830,7 +831,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(0);
   }
 
   @Test
@@ -875,7 +876,7 @@ public class TestSparkScan extends SparkTestBaseWithCatalog 
{
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
 
     // NOT (years(ts) = 47 AND bucket(id, 5) >= 2)
     builder = scanBuilder();
diff --git 
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
 
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
index 334725ec8c..3450050e4f 100644
--- 
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
+++ 
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
@@ -425,7 +425,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
 
     // NOT Equal
     builder = scanBuilder();
@@ -434,6 +434,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
+    // notEq can't be answered using column bounds because they are not exact
     assertThat(scan.planInputPartitions().length).isEqualTo(10);
   }
 
@@ -482,7 +483,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
 
     // NOT GT
     builder = scanBuilder();
@@ -491,7 +492,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
   }
 
   @TestTemplate
@@ -539,7 +540,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
 
     // NOT LT
     builder = scanBuilder();
@@ -548,7 +549,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
   }
 
   @TestTemplate
@@ -595,7 +596,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(8);
 
     // NOT GTEQ
     builder = scanBuilder();
@@ -604,7 +605,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(2);
   }
 
   @TestTemplate
@@ -752,7 +753,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
   }
 
   @TestTemplate
@@ -791,7 +792,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(0);
 
     // NOT IsNull
     builder = scanBuilder();
@@ -848,7 +849,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(0);
   }
 
   @TestTemplate
@@ -893,7 +894,7 @@ public class TestSparkScan extends TestBaseWithCatalog {
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions().length).isEqualTo(10);
+    assertThat(scan.planInputPartitions().length).isEqualTo(5);
 
     // NOT (years(ts) = 47 AND bucket(id, 5) >= 2)
     builder = scanBuilder();

(iceberg) branch main updated: API, Core: Update inclusive metrics evaluator for extract and transforms (#12311)

Reply via email to