stevenzwu commented on code in PR #14500:
URL: https://github.com/apache/iceberg/pull/14500#discussion_r2796072864


##########
api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java:
##########
@@ -1350,4 +1352,122 @@ private void assertEquals(UnboundTransform<?, ?> 
expected, UnboundTransform<?, ?
         .hasToString(expected.transform().toString());
     assertEquals(expected.ref(), actual.ref());
   }
+
+  // Tests for UUID bounds predicate detection and transformation
+
+  @Test
+  public void testToSignedUUIDLiteralNoTransformForNonUuid() {
+    Expression original = Expressions.equal("id", 42L);
+    Expression result = ExpressionUtil.toSignedUUIDLiteral(original);
+
+    assertThat(result).as("Should return null for non-UUID 
predicate").isNull();
+  }
+
+  @Test
+  public void testToSignedUUIDLiteralNoTransformForIsNull() {
+    Expression original = Expressions.isNull("uuid_col");
+    Expression result = ExpressionUtil.toSignedUUIDLiteral(original);
+
+    assertThat(result)
+        .as("Should return null for UUID isNull (doesn't compare against 
bounds)")
+        .isNull();
+  }
+
+  @Test
+  public void testToSignedUUIDLiteralTransformsEqPredicate() {
+    UUID testUuid = UUID.randomUUID();
+    Expression original = Expressions.equal("uuid_col", testUuid);
+
+    Expression result = ExpressionUtil.toSignedUUIDLiteral(original);
+    assertThat(result).isNotNull().isInstanceOf(UnboundPredicate.class);
+    UnboundPredicate<?> predicate = (UnboundPredicate<?>) result;
+    assertThat(predicate.literal().value()).isEqualTo(testUuid);
+    // The literal should now use the signed comparator

Review Comment:
   do we need to add an assertion after line 1379 that the literal's 
`useSignedComparator` var is `false`?
   
   This comment applies to many other test methods.



##########
api/src/test/java/org/apache/iceberg/types/TestComparators.java:
##########
@@ -100,6 +100,35 @@ public void testUuid() {
         Comparators.forType(Types.UUIDType.get()),
         UUID.fromString("81873e7d-1374-4493-8e1d-9095eff7046c"),
         UUID.fromString("fd02441d-1423-4a3f-8785-c7dd5647e26b"));
+    assertComparesCorrectly(
+        Comparators.forType(Types.UUIDType.get()),
+        UUID.fromString("00000000-0000-0000-0000-000000000000"),
+        UUID.fromString("60000000-0000-0000-0000-000000000000"));
+    assertComparesCorrectly(
+        Comparators.forType(Types.UUIDType.get()),
+        UUID.fromString("60000000-0000-0000-0000-000000000000"),
+        UUID.fromString("70000000-0000-0000-0000-000000000000"));
+    // The following assertion fails prior to the introduction of 
UUIDComparator.

Review Comment:
   nit: prior to the introduction of `unsigned` UUIDComparator.



##########
api/src/test/java/org/apache/iceberg/expressions/TestInclusiveManifestEvaluator.java:
##########
@@ -853,4 +869,335 @@ public void testNotInWithSingleValue() {
         .as("Should not read: manifest contains single float value with no 
NaNs")
         .isFalse();
   }
+
+  @Test
+  public void testUuidEq() {
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(equal("uuid", belowMin), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should not read: uuid below lower 
bound").isFalse();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(equal("uuid", UUID_MIN_VALUE), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid equal to lower 
bound").isTrue();
+
+    UUID between = UUID.fromString("7fffffff-ffff-ffff-7fff-ffffffffffff");
+    shouldRead = ManifestEvaluator.forRowFilter(equal("uuid", between), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid between lower and upper 
bounds").isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(equal("uuid", UUID_MAX_VALUE), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid equal to upper 
bound").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead = ManifestEvaluator.forRowFilter(equal("uuid", aboveMax), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should not read: uuid above upper 
bound").isFalse();
+  }
+
+  @Test
+  public void testUuidLt() {
+    // With RFC comparison, belowMin is below the lower bound so no rows can 
be < belowMin.
+    // With signed comparison, UUID_MIN_VALUE.compareTo(belowMin) = -1 (lower 
< lit),
+    // so rows might match. We try both comparators and return true if either 
matches.
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(lessThan("uuid", belowMin), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: signed UUID fallback finds matches with inverted 
bounds")
+        .isTrue();
+
+    // UUID_MIN_VALUE is the lower bound, so no rows can be < UUID_MIN_VALUE.
+    // Both RFC and signed comparators agree on this since we're comparing the 
value to itself.
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThan("uuid", UUID_MIN_VALUE), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should not read: uuid range below lower bound (UUID_MIN is not < 
UUID_MIN)")
+        .isFalse();
+
+    UUID justAboveMin = 
UUID.fromString("00000000-0000-0001-0000-000000000000");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThan("uuid", justAboveMin), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThan("uuid", UUID_MAX_VALUE), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid between lower and upper 
bounds").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead = ManifestEvaluator.forRowFilter(lessThan("uuid", aboveMax), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+  }
+
+  @Test
+  public void testUuidLtEq() {
+    // With RFC comparison, belowMin is below the lower bound so no rows can 
be <= belowMin.
+    // With signed comparison, the bounds are inverted, so rows might match.
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(lessThanOrEqual("uuid", belowMin), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: signed UUID fallback finds matches with inverted 
bounds")
+        .isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThanOrEqual("uuid", 
UUID_MIN_VALUE), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThanOrEqual("uuid", 
UUID_MAX_VALUE), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThanOrEqual("uuid", aboveMax), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+  }
+
+  @Test
+  public void testUuidGt() {
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThan("uuid", belowMin), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThan("uuid", UUID_MIN_VALUE), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid between lower and upper 
bounds").isTrue();
+
+    UUID justBelowMax = 
UUID.fromString("ffffffff-ffff-fffe-ffff-ffffffffffff");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThan("uuid", justBelowMax), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();
+
+    // UUID_MAX_VALUE is the upper bound, so no rows can be > UUID_MAX_VALUE.
+    // Both RFC and signed comparators agree on this since we're comparing the 
value to itself.
+    shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThan("uuid", UUID_MAX_VALUE), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should not read: uuid range above upper bound (UUID_MAX is not > 
UUID_MAX)")
+        .isFalse();
+
+    // With RFC comparison, aboveMax is above the upper bound so no rows can 
be > aboveMax.
+    // With signed comparison, UUID_MAX_VALUE.compareTo(aboveMax) = 1 (upper > 
lit),
+    // so rows might match. We try both comparators and return true if either 
matches.
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThan("uuid", aboveMax), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: signed UUID fallback finds matches with inverted 
bounds")
+        .isTrue();
+  }
+
+  @Test
+  public void testUuidGtEq() {
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThanOrEqual("uuid", belowMin), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThanOrEqual("uuid", 
UUID_MIN_VALUE), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThanOrEqual("uuid", 
UUID_MAX_VALUE), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();
+
+    // With RFC comparison, aboveMax is above the upper bound so no rows can 
be >= aboveMax.
+    // With signed comparison, UUID_MAX_VALUE.compareTo(aboveMax) = 1 (upper > 
lit),
+    // so rows might match. We try both comparators and return true if either 
matches.
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThanOrEqual("uuid", aboveMax), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: signed UUID fallback finds matches with inverted 
bounds")
+        .isTrue();
+  }
+
+  @Test
+  public void testUuidIn() {
+    UUID belowMin1 = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    UUID belowMin2 = UUID.fromString("00000000-0000-0000-0000-000000000001");
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(in("uuid", belowMin1, belowMin2), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should not read: uuids below lower 
bound").isFalse();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(in("uuid", belowMin1, UUID_MIN_VALUE), 
SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid equal to lower 
bound").isTrue();
+
+    UUID middle1 = UUID.fromString("7fffffff-ffff-ffff-0000-000000000000");
+    UUID middle2 = UUID.fromString("7fffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(in("uuid", middle1, middle2), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuids between lower and upper 
bounds").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(in("uuid", UUID_MAX_VALUE, aboveMax), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid equal to upper 
bound").isTrue();
+
+    UUID aboveMax2 = UUID.fromString("ffffffff-ffff-ffff-ffff-fffffffffffe");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(in("uuid", aboveMax, aboveMax2), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should not read: uuids above upper 
bound").isFalse();
+  }
+
+  // Tests for legacy UUID file compatibility (files written with signed UUID 
comparator)
+  // These tests simulate manifests where min/max bounds were computed using 
Java's signed
+  // comparison.
+  //
+  // Key insight: Java's UUID.compareTo() compares MSB first, then LSB, both 
as SIGNED longs.
+  // This means:
+  //   - UUIDs starting with 0x00-0x7F are "positive" (MSB is positive as 
signed long)
+  //   - UUIDs starting with 0x80-0xFF are "negative" (MSB is negative as 
signed long)
+  //
+  // Example file containing UUIDs: 0x00..., 0x40..., 0x80...
+  //   - Unsigned (RFC) order: 0x00... < 0x40... < 0x80...
+  //   - Signed (Java) order:  0x80... < 0x00... < 0x40...
+  //
+  // If written with signed comparator, the manifest would have:
+  //   - min = 0x80... (smallest in signed order)
+  //   - max = 0x40... (largest in signed order)
+
+  // Legacy manifest with "inverted" bounds (min > max in unsigned order)
+  private static final UUID LEGACY_UUID_MIN =

Review Comment:
   nit: we can probably move all the static constructs near the top of the 
class. I don't think I have seen this style of putting them in the middle



##########
api/src/test/java/org/apache/iceberg/types/TestComparators.java:
##########
@@ -100,6 +100,35 @@ public void testUuid() {
         Comparators.forType(Types.UUIDType.get()),
         UUID.fromString("81873e7d-1374-4493-8e1d-9095eff7046c"),
         UUID.fromString("fd02441d-1423-4a3f-8785-c7dd5647e26b"));
+    assertComparesCorrectly(

Review Comment:
   do we also need to add coverage for `useSignedUuid` variation?



##########
api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java:
##########
@@ -1138,4 +1154,322 @@ public void testNotInWithSingleValue() {
         .as("Should read: file has NaN values which match NOT IN predicate")
         .isTrue();
   }
+
+  @Test
+  public void testUuidEq() {
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, equal("uuid", 
belowMin)).eval(FILE);
+    assertThat(shouldRead).as("Should not read: uuid below lower 
bound").isFalse();
+
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, equal("uuid", 
UUID_MIN_VALUE)).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid equal to lower 
bound").isTrue();
+
+    UUID between = UUID.fromString("7fffffff-ffff-ffff-7fff-ffffffffffff");
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, equal("uuid", 
between)).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid between lower and upper 
bounds").isTrue();
+
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, equal("uuid", 
UUID_MAX_VALUE)).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid equal to upper 
bound").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, equal("uuid", 
aboveMax)).eval(FILE);
+    assertThat(shouldRead).as("Should not read: uuid above upper 
bound").isFalse();
+  }
+
+  @Test
+  public void testUuidLt() {
+    // With RFC comparison, belowMin is below the lower bound so no rows can 
be < belowMin.
+    // With signed comparison, UUID_MIN_VALUE.compareTo(belowMin) = -1 (lower 
< lit),
+    // so rows might match. We try both comparators and return true if either 
matches.
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, lessThan("uuid", 
belowMin)).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: signed UUID fallback finds matches with inverted 
bounds")
+        .isTrue();
+
+    // UUID_MIN_VALUE is the lower bound, so no rows can be < UUID_MIN_VALUE.
+    // Both RFC and signed comparators agree on this since we're comparing the 
value to itself.
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, lessThan("uuid", 
UUID_MIN_VALUE)).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should not read: uuid range below lower bound (UUID_MIN is not < 
UUID_MIN)")
+        .isFalse();
+
+    UUID justAboveMin = 
UUID.fromString("00000000-0000-0001-0000-000000000000");
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, lessThan("uuid", 
justAboveMin)).eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();
+
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, lessThan("uuid", 
UUID_MAX_VALUE)).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid between lower and upper 
bounds").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, lessThan("uuid", 
aboveMax)).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+  }
+
+  @Test
+  public void testUuidLtEq() {
+    // With RFC comparison, belowMin is below the lower bound so no rows can 
be <= belowMin.
+    // However, we also try signed comparison for backward compatibility, and 
with signed comparison
+    // the bounds are inverted, so rows might match.
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, lessThanOrEqual("uuid", 
belowMin)).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: signed UUID fallback may find matches with inverted 
bounds")
+        .isTrue();
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, lessThanOrEqual("uuid", 
UUID_MIN_VALUE)).eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, lessThanOrEqual("uuid", 
UUID_MAX_VALUE)).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, lessThanOrEqual("uuid", 
aboveMax)).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+  }
+
+  @Test
+  public void testUuidGt() {
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, greaterThan("uuid", 
belowMin)).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, greaterThan("uuid", 
UUID_MIN_VALUE)).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid between lower and upper 
bounds").isTrue();
+
+    UUID justBelowMax = 
UUID.fromString("ffffffff-ffff-fffe-ffff-ffffffffffff");
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, greaterThan("uuid", 
justBelowMax)).eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();
+
+    // UUID_MAX_VALUE is the upper bound, so no rows can be > UUID_MAX_VALUE.
+    // Both RFC and signed comparators agree on this since we're comparing the 
value to itself.
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, greaterThan("uuid", 
UUID_MAX_VALUE)).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should not read: uuid range above upper bound (UUID_MAX is not > 
UUID_MAX)")
+        .isFalse();
+
+    // With RFC comparison, aboveMax is above the upper bound so no rows can 
be > aboveMax.
+    // With signed comparison, UUID_MAX_VALUE.compareTo(aboveMax) = 1 (upper > 
lit),
+    // so rows might match. We try both comparators and return true if either 
matches.
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, greaterThan("uuid", 
aboveMax)).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: signed UUID fallback finds matches with inverted 
bounds")
+        .isTrue();
+  }
+
+  @Test
+  public void testUuidGtEq() {
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, greaterThanOrEqual("uuid", 
belowMin)).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, greaterThanOrEqual("uuid", 
UUID_MIN_VALUE))
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, greaterThanOrEqual("uuid", 
UUID_MAX_VALUE))
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();
+
+    // With RFC comparison, aboveMax is above the upper bound so no rows can 
be >= aboveMax.
+    // With signed comparison, UUID_MAX_VALUE.compareTo(aboveMax) = 1 (upper > 
lit),
+    // so rows might match. We try both comparators and return true if either 
matches.
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, greaterThanOrEqual("uuid", 
aboveMax)).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: signed UUID fallback finds matches with inverted 
bounds")
+        .isTrue();
+  }
+
+  @Test
+  public void testUuidIn() {
+    UUID belowMin1 = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    UUID belowMin2 = UUID.fromString("00000000-0000-0000-0000-000000000001");
+    boolean shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, in("uuid", belowMin1, 
belowMin2)).eval(FILE);
+    assertThat(shouldRead).as("Should not read: uuids below lower 
bound").isFalse();
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, in("uuid", belowMin1, 
UUID_MIN_VALUE)).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid equal to lower 
bound").isTrue();
+
+    UUID middle1 = UUID.fromString("7fffffff-ffff-ffff-0000-000000000000");
+    UUID middle2 = UUID.fromString("7fffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, in("uuid", middle1, 
middle2)).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuids between lower and upper 
bounds").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, in("uuid", UUID_MAX_VALUE, 
aboveMax)).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid equal to upper 
bound").isTrue();
+
+    UUID aboveMax2 = UUID.fromString("ffffffff-ffff-ffff-ffff-fffffffffffe");
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, in("uuid", aboveMax, 
aboveMax2)).eval(FILE);
+    assertThat(shouldRead).as("Should not read: uuids above upper 
bound").isFalse();
+  }
+
+  @Test
+  public void testUuidNotEq() {
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notEqual("uuid", 
belowMin)).eval(FILE);
+    assertThat(shouldRead).as("Should read: notEqual always reads").isTrue();
+
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, notEqual("uuid", 
UUID_MIN_VALUE)).eval(FILE);
+    assertThat(shouldRead).as("Should read: notEqual always reads").isTrue();
+
+    UUID middle = UUID.fromString("7fffffff-ffff-ffff-7fff-ffffffffffff");
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, notEqual("uuid", 
middle)).eval(FILE);
+    assertThat(shouldRead).as("Should read: notEqual always reads").isTrue();
+
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, notEqual("uuid", 
UUID_MAX_VALUE)).eval(FILE);
+    assertThat(shouldRead).as("Should read: notEqual always reads").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, notEqual("uuid", 
aboveMax)).eval(FILE);
+    assertThat(shouldRead).as("Should read: notEqual always reads").isTrue();
+  }
+
+  @Test
+  public void testUuidNotIn() {
+    UUID belowMin1 = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    UUID belowMin2 = UUID.fromString("00000000-0000-0000-0000-000000000001");
+    boolean shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notIn("uuid", belowMin1, 
belowMin2)).eval(FILE);
+    assertThat(shouldRead).as("Should read: notIn always reads").isTrue();
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notIn("uuid", UUID_MIN_VALUE, 
UUID_MAX_VALUE))
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: notIn always reads").isTrue();
+
+    UUID middle1 = UUID.fromString("7fffffff-ffff-ffff-0000-000000000000");
+    UUID middle2 = UUID.fromString("7fffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead = new InclusiveMetricsEvaluator(SCHEMA, notIn("uuid", middle1, 
middle2)).eval(FILE);
+    assertThat(shouldRead).as("Should read: notIn always reads").isTrue();
+  }
+
+  // Tests for legacy UUID file compatibility (files written with signed UUID 
comparator)

Review Comment:
   similar comment to move static constructs near top



##########
api/src/test/java/org/apache/iceberg/expressions/TestInclusiveManifestEvaluator.java:
##########
@@ -853,4 +869,335 @@ public void testNotInWithSingleValue() {
         .as("Should not read: manifest contains single float value with no 
NaNs")
         .isFalse();
   }
+
+  @Test
+  public void testUuidEq() {
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(equal("uuid", belowMin), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should not read: uuid below lower 
bound").isFalse();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(equal("uuid", UUID_MIN_VALUE), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid equal to lower 
bound").isTrue();
+
+    UUID between = UUID.fromString("7fffffff-ffff-ffff-7fff-ffffffffffff");
+    shouldRead = ManifestEvaluator.forRowFilter(equal("uuid", between), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid between lower and upper 
bounds").isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(equal("uuid", UUID_MAX_VALUE), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid equal to upper 
bound").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead = ManifestEvaluator.forRowFilter(equal("uuid", aboveMax), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should not read: uuid above upper 
bound").isFalse();
+  }
+
+  @Test
+  public void testUuidLt() {
+    // With RFC comparison, belowMin is below the lower bound so no rows can 
be < belowMin.
+    // With signed comparison, UUID_MIN_VALUE.compareTo(belowMin) = -1 (lower 
< lit),
+    // so rows might match. We try both comparators and return true if either 
matches.
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(lessThan("uuid", belowMin), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: signed UUID fallback finds matches with inverted 
bounds")
+        .isTrue();
+
+    // UUID_MIN_VALUE is the lower bound, so no rows can be < UUID_MIN_VALUE.
+    // Both RFC and signed comparators agree on this since we're comparing the 
value to itself.
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThan("uuid", UUID_MIN_VALUE), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should not read: uuid range below lower bound (UUID_MIN is not < 
UUID_MIN)")
+        .isFalse();
+
+    UUID justAboveMin = 
UUID.fromString("00000000-0000-0001-0000-000000000000");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThan("uuid", justAboveMin), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThan("uuid", UUID_MAX_VALUE), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid between lower and upper 
bounds").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead = ManifestEvaluator.forRowFilter(lessThan("uuid", aboveMax), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+  }
+
+  @Test
+  public void testUuidLtEq() {
+    // With RFC comparison, belowMin is below the lower bound so no rows can 
be <= belowMin.
+    // With signed comparison, the bounds are inverted, so rows might match.
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(lessThanOrEqual("uuid", belowMin), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should read: signed UUID fallback finds matches with inverted 
bounds")
+        .isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThanOrEqual("uuid", 
UUID_MIN_VALUE), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThanOrEqual("uuid", 
UUID_MAX_VALUE), SPEC, true)
+            .eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(lessThanOrEqual("uuid", aboveMax), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+  }
+
+  @Test
+  public void testUuidGt() {
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThan("uuid", belowMin), SPEC, 
true).eval(FILE);
+    assertThat(shouldRead).as("Should read: all uuids in range").isTrue();
+
+    shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThan("uuid", UUID_MIN_VALUE), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead).as("Should read: uuid between lower and upper 
bounds").isTrue();
+
+    UUID justBelowMax = 
UUID.fromString("ffffffff-ffff-fffe-ffff-ffffffffffff");
+    shouldRead =
+        ManifestEvaluator.forRowFilter(greaterThan("uuid", justBelowMax), 
SPEC, true).eval(FILE);
+    assertThat(shouldRead).as("Should read: one possible uuid").isTrue();

Review Comment:
   nit: why the error msg is not the same as above?
   ```
   Should read: uuid between lower and upper bounds
   ```



##########
api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java:
##########
@@ -684,4 +733,111 @@ SCHEMA, lessThanOrEqual("struct.nested_col_with_stats", 
INT_MAX_VALUE))
         new StrictMetricsEvaluator(SCHEMA, 
notNull("struct.nested_col_with_stats")).eval(FILE);
     assertThat(shouldRead).as("notNull nested column should not 
match").isFalse();
   }
+
+  // Tests for UUID with StrictMetricsEvaluator using RFC-compliant comparison 
only
+
+  @Test
+  public void testStrictUuidGt() {

Review Comment:
   should we test other scenarios like MIN_VALUE, middle, MAX_VALUE, aboveMax?



##########
api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java:
##########
@@ -684,4 +733,111 @@ SCHEMA, lessThanOrEqual("struct.nested_col_with_stats", 
INT_MAX_VALUE))
         new StrictMetricsEvaluator(SCHEMA, 
notNull("struct.nested_col_with_stats")).eval(FILE);
     assertThat(shouldRead).as("notNull nested column should not 
match").isFalse();
   }
+
+  // Tests for UUID with StrictMetricsEvaluator using RFC-compliant comparison 
only
+
+  @Test
+  public void testStrictUuidGt() {
+    // Query: uuid > 0x00... (all UUIDs in file should be > this)
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, greaterThan("uuid", 
belowMin)).eval(UUID_FILE);
+    // With bounds [UUID_MIN, UUID_MAX], all values should be > belowMin
+    assertThat(allMatch).as("All UUIDs should be greater than 
belowMin").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidLt() {

Review Comment:
   similar comment for testing other scenarios as above



##########
data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java:
##########
@@ -1138,6 +1149,139 @@ public void testUUID() {
         .isTrue();
   }
 
+  /**
+   * Tests UUID filtering with values that span the signed/unsigned comparison 
boundary. In RFC 9562
+   * unsigned comparison: UUID_LOW < UUID_MID < UUID_HIGH < UUID_HIGHER In 
legacy signed comparison:

Review Comment:
   nit: missing a `.` after `UUID_HIGHER`



##########
data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java:
##########
@@ -1138,6 +1149,139 @@ public void testUUID() {
         .isTrue();
   }
 
+  /**
+   * Tests UUID filtering with values that span the signed/unsigned comparison 
boundary. In RFC 9562
+   * unsigned comparison: UUID_LOW < UUID_MID < UUID_HIGH < UUID_HIGHER In 
legacy signed comparison:
+   * UUID_HIGH < UUID_HIGHER < UUID_LOW < UUID_MID (high bit treated as 
negative)
+   *
+   * <p>The dual-comparator approach ensures we don't incorrectly skip files 
that might contain
+   * matching rows when reading legacy files with inverted UUID bounds.
+   */
+  @TestTemplate
+  public void testUUIDWithHighBitValues() throws IOException {
+    assumeThat(format).as("Only valid for 
Parquet").isEqualTo(FileFormat.PARQUET);
+
+    // Create a file with UUIDs spanning the signed/unsigned boundary
+    List<GenericRecord> records = Lists.newArrayList();
+    UUID[] uuids = {UUID_LOW, UUID_MID, UUID_HIGH, UUID_HIGHER};
+    for (int i = 0; i < uuids.length; i++) {
+      GenericRecord record = GenericRecord.create(UUID_SCHEMA);
+      record.setField("id", i);
+      record.setField("uuid_col", uuids[i]);
+      records.add(record);
+    }
+
+    File parquetFile = writeParquetFile("uuid-high-bit-test", UUID_SCHEMA, 
records);
+    InputFile inFile = Files.localInput(parquetFile);
+    try (ParquetFileReader reader = 
ParquetFileReader.open(parquetInputFile(inFile))) {
+      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
+      MessageType fileSchema = reader.getFileMetaData().getSchema();
+
+      // Test equality - should find exact matches
+      assertThat(shouldReadUUID(equal("uuid_col", UUID_HIGH), fileSchema, 
blockMetaData))
+          .as("Should read: file contains UUID_HIGH")
+          .isTrue();
+
+      // Test less than with high-bit UUID
+      // Query: uuid < UUID_HIGH (should match UUID_LOW and UUID_MID in RFC 
order)
+      assertThat(shouldReadUUID(lessThan("uuid_col", UUID_HIGH), fileSchema, 
blockMetaData))
+          .as("Should read: file contains values less than UUID_HIGH")
+          .isTrue();
+
+      // Test greater than with low UUID
+      // Query: uuid > UUID_LOW (should match UUID_MID, UUID_HIGH, UUID_HIGHER 
in RFC order)
+      assertThat(shouldReadUUID(greaterThan("uuid_col", UUID_LOW), fileSchema, 
blockMetaData))
+          .as("Should read: file contains values greater than UUID_LOW")
+          .isTrue();
+
+      // Test greater than with highest UUID - should not match any
+      UUID aboveAll = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+      assertThat(shouldReadUUID(greaterThan("uuid_col", aboveAll), fileSchema, 
blockMetaData))
+          .as("Should skip: no values greater than max UUID")
+          .isFalse();
+
+      // Test less than with lowest UUID - should not match any
+      UUID belowAll = UUID.fromString("00000000-0000-0000-0000-000000000000");
+      assertThat(shouldReadUUID(lessThan("uuid_col", belowAll), fileSchema, 
blockMetaData))
+          .as("Should skip: no values less than min UUID")
+          .isFalse();
+
+      // Test IN with high-bit UUIDs
+      assertThat(shouldReadUUID(in("uuid_col", UUID_HIGH, UUID_HIGHER), 
fileSchema, blockMetaData))
+          .as("Should read: file contains one of the IN values")
+          .isTrue();
+
+      // Test IN with UUID outside file bounds (above max)
+      // The file's max is UUID_HIGHER (0xc0...), so 0xff... is outside bounds
+      UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+      assertThat(shouldReadUUID(in("uuid_col", aboveMax), fileSchema, 
blockMetaData))
+          .as("Should skip: value is above file's max bound")
+          .isFalse();
+
+      // Test NOT IN - should read when file contains values not in the set
+      assertThat(shouldReadUUID(notIn("uuid_col", UUID_LOW), fileSchema, 
blockMetaData))
+          .as("Should read: file contains values not in the exclusion set")
+          .isTrue();
+    }
+  }
+
+  /**
+   * Tests that the dual-comparator approach correctly handles queries that 
would give different
+   * results with signed vs unsigned comparison. This is critical for backward 
compatibility with
+   * legacy files that may have UUID bounds computed with signed comparison.
+   */
+  @TestTemplate
+  public void testUUIDComparisonBoundary() throws IOException {
+    assumeThat(format).as("Only valid for 
Parquet").isEqualTo(FileFormat.PARQUET);
+
+    // Create a file with only high-bit UUIDs (0x80... and above)
+    // These are the UUIDs that would be ordered differently in signed vs 
unsigned comparison
+    List<GenericRecord> records = Lists.newArrayList();
+    UUID[] highBitUuids = {UUID_HIGH, UUID_HIGHER};
+    for (int i = 0; i < highBitUuids.length; i++) {
+      GenericRecord record = GenericRecord.create(UUID_SCHEMA);
+      record.setField("id", i);
+      record.setField("uuid_col", highBitUuids[i]);
+      records.add(record);
+    }
+
+    File parquetFile = writeParquetFile("uuid-high-only-test", UUID_SCHEMA, 
records);

Review Comment:
   this would still generate parquet column chunk stats using unsigned 
comparator?
   
   Can we still generate parquet using the signed comparator to test Parquet 
files written in the legacy way?



##########
parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java:
##########
@@ -80,15 +103,20 @@ private class MetricsEvalVisitor extends 
BoundExpressionVisitor<Boolean> {
     private Map<Integer, Statistics<?>> stats = null;
     private Map<Integer, Long> valueCounts = null;
     private Map<Integer, Function<Object, Object>> conversions = null;
+    // Flag to use signed UUID comparator for backward compatibility.
+    // This is needed for the IN predicate because the comparator information 
is lost
+    // when binding converts literals to a Set<T> of raw values.
+    private boolean useSignedUuidComparator = false;
 
-    private boolean eval(MessageType fileSchema, BlockMetaData rowGroup) {
+    private boolean eval(MessageType fileSchema, BlockMetaData rowGroup, 
boolean signedUuidMode) {

Review Comment:
   maybe also make this class static and pass in the expression as arg like the 
`InclusiveMetricsEvaluatro`?



##########
api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java:
##########
@@ -684,4 +733,111 @@ SCHEMA, lessThanOrEqual("struct.nested_col_with_stats", 
INT_MAX_VALUE))
         new StrictMetricsEvaluator(SCHEMA, 
notNull("struct.nested_col_with_stats")).eval(FILE);
     assertThat(shouldRead).as("notNull nested column should not 
match").isFalse();
   }
+
+  // Tests for UUID with StrictMetricsEvaluator using RFC-compliant comparison 
only
+
+  @Test
+  public void testStrictUuidGt() {
+    // Query: uuid > 0x00... (all UUIDs in file should be > this)
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, greaterThan("uuid", 
belowMin)).eval(UUID_FILE);
+    // With bounds [UUID_MIN, UUID_MAX], all values should be > belowMin
+    assertThat(allMatch).as("All UUIDs should be greater than 
belowMin").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidLt() {
+    // Query: uuid < 0xFF... (all UUIDs in file should be < this)
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, lessThan("uuid", 
aboveMax)).eval(UUID_FILE);
+    // With bounds [UUID_MIN, UUID_MAX], all values should be < aboveMax
+    assertThat(allMatch).as("All UUIDs should be less than aboveMax").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidEqNeverMatchesRange() {
+    // Strict eq should never match when there's a range of values
+    UUID middle = UUID.fromString("40000000-0000-0000-0000-000000000001");
+    boolean allMatch = new StrictMetricsEvaluator(SCHEMA, equal("uuid", 
middle)).eval(UUID_FILE);
+    assertThat(allMatch).as("Strict eq should not match range").isFalse();
+  }
+
+  @Test
+  public void testStrictUuidInNeverMatchesRange() {
+    // Strict IN should never match when there's a range of values (lower != 
upper)
+    UUID middle1 = UUID.fromString("40000000-0000-0000-0000-000000000001");
+    UUID middle2 = UUID.fromString("50000000-0000-0000-0000-000000000001");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, in("uuid", middle1, 
middle2)).eval(UUID_FILE);
+    assertThat(allMatch).as("Strict IN should not match range").isFalse();
+  }
+
+  @Test
+  public void testStrictUuidInMatchesSingleValue() {
+    // Strict IN should match when lower == upper and the value is in the set
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, in("uuid", 
SINGLE_UUID)).eval(SINGLE_UUID_FILE);
+    assertThat(allMatch).as("Strict IN should match single value in 
set").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidInDoesNotMatchWhenValueNotInSet() {
+    // Strict IN should not match when lower == upper but the value is not in 
the set
+    UUID otherUuid = UUID.fromString("50000000-0000-0000-0000-000000000001");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, in("uuid", 
otherUuid)).eval(SINGLE_UUID_FILE);
+    assertThat(allMatch).as("Strict IN should not match when value not in 
set").isFalse();
+  }
+
+  @Test
+  public void testStrictUuidNotInMatchesWhenAllValuesOutsideBounds() {
+    // Strict NOT IN should match when all values in the set are outside the 
file's bounds
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, notIn("uuid", belowMin, 
aboveMax)).eval(UUID_FILE);
+    // All values in the set are outside [UUID_MIN, UUID_MAX], so all rows 
match NOT IN
+    assertThat(allMatch).as("Strict NOT IN should match when all values 
outside bounds").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidNotInDoesNotMatchWhenValueInBounds() {
+    // Strict NOT IN should not match when a value in the set is within bounds
+    UUID middle = UUID.fromString("40000000-0000-0000-0000-000000000001");
+    boolean allMatch = new StrictMetricsEvaluator(SCHEMA, notIn("uuid", 
middle)).eval(UUID_FILE);
+    // middle is within [UUID_MIN, UUID_MAX], so some rows might match the 
value
+    assertThat(allMatch).as("Strict NOT IN should not match when value in 
bounds").isFalse();
+  }
+
+  // Tests for file with inverted UUID bounds (as would be written by legacy 
signed comparator)
+
+  @Test
+  public void testStrictUuidInWithLegacyInvertedBounds() {
+    // With inverted bounds [0x80..., 0x40...] where lower > upper in RFC 
order,

Review Comment:
   we don't need to perform the dual check (signed and unsigned) for 
`StrictMetricsEvaluator`?



##########
api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java:
##########
@@ -684,4 +733,111 @@ SCHEMA, lessThanOrEqual("struct.nested_col_with_stats", 
INT_MAX_VALUE))
         new StrictMetricsEvaluator(SCHEMA, 
notNull("struct.nested_col_with_stats")).eval(FILE);
     assertThat(shouldRead).as("notNull nested column should not 
match").isFalse();
   }
+
+  // Tests for UUID with StrictMetricsEvaluator using RFC-compliant comparison 
only
+
+  @Test
+  public void testStrictUuidGt() {
+    // Query: uuid > 0x00... (all UUIDs in file should be > this)
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, greaterThan("uuid", 
belowMin)).eval(UUID_FILE);
+    // With bounds [UUID_MIN, UUID_MAX], all values should be > belowMin
+    assertThat(allMatch).as("All UUIDs should be greater than 
belowMin").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidLt() {
+    // Query: uuid < 0xFF... (all UUIDs in file should be < this)
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, lessThan("uuid", 
aboveMax)).eval(UUID_FILE);
+    // With bounds [UUID_MIN, UUID_MAX], all values should be < aboveMax
+    assertThat(allMatch).as("All UUIDs should be less than aboveMax").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidEqNeverMatchesRange() {
+    // Strict eq should never match when there's a range of values
+    UUID middle = UUID.fromString("40000000-0000-0000-0000-000000000001");
+    boolean allMatch = new StrictMetricsEvaluator(SCHEMA, equal("uuid", 
middle)).eval(UUID_FILE);
+    assertThat(allMatch).as("Strict eq should not match range").isFalse();
+  }
+
+  @Test
+  public void testStrictUuidInNeverMatchesRange() {
+    // Strict IN should never match when there's a range of values (lower != 
upper)
+    UUID middle1 = UUID.fromString("40000000-0000-0000-0000-000000000001");
+    UUID middle2 = UUID.fromString("50000000-0000-0000-0000-000000000001");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, in("uuid", middle1, 
middle2)).eval(UUID_FILE);
+    assertThat(allMatch).as("Strict IN should not match range").isFalse();
+  }
+
+  @Test
+  public void testStrictUuidInMatchesSingleValue() {
+    // Strict IN should match when lower == upper and the value is in the set
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, in("uuid", 
SINGLE_UUID)).eval(SINGLE_UUID_FILE);
+    assertThat(allMatch).as("Strict IN should match single value in 
set").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidInDoesNotMatchWhenValueNotInSet() {
+    // Strict IN should not match when lower == upper but the value is not in 
the set
+    UUID otherUuid = UUID.fromString("50000000-0000-0000-0000-000000000001");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, in("uuid", 
otherUuid)).eval(SINGLE_UUID_FILE);
+    assertThat(allMatch).as("Strict IN should not match when value not in 
set").isFalse();
+  }
+
+  @Test
+  public void testStrictUuidNotInMatchesWhenAllValuesOutsideBounds() {
+    // Strict NOT IN should match when all values in the set are outside the 
file's bounds
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, notIn("uuid", belowMin, 
aboveMax)).eval(UUID_FILE);
+    // All values in the set are outside [UUID_MIN, UUID_MAX], so all rows 
match NOT IN
+    assertThat(allMatch).as("Strict NOT IN should match when all values 
outside bounds").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidNotInDoesNotMatchWhenValueInBounds() {
+    // Strict NOT IN should not match when a value in the set is within bounds
+    UUID middle = UUID.fromString("40000000-0000-0000-0000-000000000001");
+    boolean allMatch = new StrictMetricsEvaluator(SCHEMA, notIn("uuid", 
middle)).eval(UUID_FILE);
+    // middle is within [UUID_MIN, UUID_MAX], so some rows might match the 
value
+    assertThat(allMatch).as("Strict NOT IN should not match when value in 
bounds").isFalse();
+  }
+
+  // Tests for file with inverted UUID bounds (as would be written by legacy 
signed comparator)

Review Comment:
   nit: inverted -> signed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to