This is an automated email from the ASF dual-hosted git repository.

singhpk234 pushed a commit to branch feature/serialize-bound-expression
in repository https://gitbox.apache.org/repos/asf/iceberg.git

commit a2499368221549cf6596a2bcd8cda14e03ea0ae6
Author: Prashant Kumar Singh <[email protected]>
AuthorDate: Wed Oct 22 19:20:09 2025 +0000

    more cleanup
---
 .../apache/iceberg/expressions/NamedReference.java |   2 +
 .../iceberg/expressions/UnboundTransform.java      |  11 +--
 .../iceberg/expressions/ExpressionParser.java      |  82 +++++++++++++---
 .../TestExpressionParserWithResolvedReference.java | 107 ++++++++++++++++-----
 4 files changed, 155 insertions(+), 47 deletions(-)

diff --git 
a/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java 
b/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java
index c2d83bc4df..b435e2bed5 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java
@@ -38,6 +38,8 @@ public class NamedReference<T> implements UnboundTerm<T>, 
Reference<T> {
 
   @Override
   public BoundReference<T> bind(Types.StructType struct, boolean 
caseSensitive) {
+    ValidationException.check(name() != null, "Name cannot be null");
+
     Schema schema = struct.asSchema();
     Types.NestedField field =
         caseSensitive ? schema.findField(name()) : 
schema.caseInsensitiveFindField(name());
diff --git 
a/api/src/main/java/org/apache/iceberg/expressions/UnboundTransform.java 
b/api/src/main/java/org/apache/iceberg/expressions/UnboundTransform.java
index 27059d1d21..30f03e4a9e 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/UnboundTransform.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/UnboundTransform.java
@@ -36,17 +36,13 @@ public class UnboundTransform<S, T> implements 
UnboundTerm<T>, Term {
     return ref;
   }
 
-  public NamedReference<S> unboundRef() {
-    return ref;
-  }
-
   public Transform<S, T> transform() {
     return transform;
   }
 
   @Override
   public BoundTransform<S, T> bind(Types.StructType struct, boolean 
caseSensitive) {
-    BoundReference<S> boundRef = (BoundReference<S>) ref.bind(struct, 
caseSensitive);
+    BoundReference<S> boundRef = ref.bind(struct, caseSensitive);
 
     try {
       ValidationException.check(
@@ -54,11 +50,10 @@ public class UnboundTransform<S, T> implements 
UnboundTerm<T>, Term {
           "Cannot bind: %s cannot transform %s values from '%s'",
           transform,
           boundRef.type(),
-          ref.toString());
+          ref);
     } catch (IllegalArgumentException e) {
       throw new ValidationException(
-          "Cannot bind: %s cannot transform %s values from '%s'",
-          transform, boundRef.type(), ref.name());
+          "Cannot bind: %s cannot transform %s values from '%s'", transform, 
boundRef.type(), ref);
     }
 
     return new BoundTransform<>(boundRef, transform);
diff --git 
a/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java 
b/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java
index e68a71c4a7..6aea3c6e60 100644
--- a/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java
+++ b/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java
@@ -34,6 +34,7 @@ import org.apache.iceberg.SingleValueParser;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+import org.apache.iceberg.transforms.Transform;
 import org.apache.iceberg.transforms.Transforms;
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.util.JsonUtil;
@@ -51,6 +52,11 @@ public class ExpressionParser {
   private static final String REFERENCE = "reference";
   private static final String LITERAL = "literal";
 
+  public enum SerializationMode {
+    NAMES_ONLY,
+    WITH_FIELD_IDS
+  }
+
   private ExpressionParser() {}
 
   public static String toJson(Expression expression) {
@@ -62,27 +68,45 @@ public class ExpressionParser {
     return JsonUtil.generate(gen -> toJson(expression, gen), pretty);
   }
 
+  public static String toJson(Expression expression, boolean pretty, 
SerializationMode mode) {
+    Preconditions.checkArgument(expression != null, "Invalid expression: 
null");
+    return JsonUtil.generate(gen -> toJson(expression, gen, mode), pretty);
+  }
+
+  @Deprecated
   public static String toJson(Expression expression, boolean pretty, boolean 
includeFieldIds) {
     Preconditions.checkArgument(expression != null, "Invalid expression: 
null");
-    return JsonUtil.generate(gen -> toJson(expression, gen, includeFieldIds), 
pretty);
+    return toJson(
+        expression,
+        pretty,
+        includeFieldIds ? SerializationMode.WITH_FIELD_IDS : 
SerializationMode.NAMES_ONLY);
   }
 
   public static void toJson(Expression expression, JsonGenerator gen) {
-    ExpressionVisitors.visit(expression, new JsonGeneratorVisitor(gen, false));
+    ExpressionVisitors.visit(
+        expression, new JsonGeneratorVisitor(gen, 
SerializationMode.NAMES_ONLY));
+  }
+
+  public static void toJson(Expression expression, JsonGenerator gen, 
SerializationMode mode) {
+    ExpressionVisitors.visit(expression, new JsonGeneratorVisitor(gen, mode));
   }
 
+  @Deprecated
   public static void toJson(Expression expression, JsonGenerator gen, boolean 
includeFieldIds) {
-    ExpressionVisitors.visit(expression, new JsonGeneratorVisitor(gen, 
includeFieldIds));
+    toJson(
+        expression,
+        gen,
+        includeFieldIds ? SerializationMode.WITH_FIELD_IDS : 
SerializationMode.NAMES_ONLY);
   }
 
   private static class JsonGeneratorVisitor
       extends ExpressionVisitors.CustomOrderExpressionVisitor<Void> {
     private final JsonGenerator gen;
-    private final boolean includeFieldIds;
+    private final SerializationMode mode;
 
-    private JsonGeneratorVisitor(JsonGenerator gen, boolean includeFieldIds) {
+    private JsonGeneratorVisitor(JsonGenerator gen, SerializationMode mode) {
       this.gen = gen;
-      this.includeFieldIds = includeFieldIds;
+      this.mode = mode;
     }
 
     /**
@@ -251,7 +275,7 @@ public class ExpressionParser {
         return;
       } else if (term instanceof BoundTransform) {
         BoundTransform<?, ?> transform = (BoundTransform<?, ?>) term;
-        if (includeFieldIds) {
+        if (mode == SerializationMode.WITH_FIELD_IDS) {
           transformWithFieldId(
               transform.transform().toString(), transform.ref().name(), 
transform.ref().fieldId());
         } else {
@@ -260,7 +284,7 @@ public class ExpressionParser {
         return;
       } else if (term instanceof BoundReference) {
         BoundReference<?> ref = (BoundReference<?>) term;
-        if (includeFieldIds) {
+        if (mode == SerializationMode.WITH_FIELD_IDS) {
           referenceWithFieldId(ref.name(), ref.fieldId());
         } else {
           gen.writeString(ref.name());
@@ -295,8 +319,11 @@ public class ExpressionParser {
 
     private void referenceWithFieldId(String name, int fieldId) throws 
IOException {
       gen.writeStartObject();
-      gen.writeStringField("type", "ref");
-      gen.writeStringField("name", name);
+      gen.writeStringField("type", REFERENCE);
+      // Only write name if not null (support fieldId-only case)
+      if (name != null) {
+        gen.writeStringField("name", name);
+      }
       gen.writeNumberField("fieldId", fieldId);
       gen.writeEndObject();
     }
@@ -451,12 +478,39 @@ public class ExpressionParser {
       String type = JsonUtil.getString(TYPE, node);
       switch (type) {
         case REFERENCE:
-          return Expressions.ref(JsonUtil.getString(TERM, node));
+          // Reference must have a name (via "name" or "term")
+          // fieldId is optional - if present, creates ResolvedReference, 
otherwise NamedReference
+          boolean hasTerm = node.has(TERM);
+          boolean hasName = node.has("name");
+          boolean hasFieldId = node.has("fieldId");
+
+          String name =
+              hasName
+                  ? JsonUtil.getString("name", node)
+                  : hasTerm ? JsonUtil.getString(TERM, node) : null;
+
+          if (name == null) {
+            throw new IllegalArgumentException(
+                "REFERENCE must have a name (via 'name' or 'term' field): " + 
node);
+          }
+
+          if (hasFieldId) {
+            int fieldId = JsonUtil.getInt("fieldId", node);
+            return Expressions.ref(name, fieldId);
+          } else {
+            return Expressions.ref(name);
+          }
         case TRANSFORM:
           UnboundTerm<T> child = term(JsonUtil.get(TERM, node));
-          String transform = JsonUtil.getString(TRANSFORM, node);
-          return (UnboundTerm<T>)
-              Expressions.transform(child.ref().name(), 
Transforms.fromString(transform));
+          String transformStr = JsonUtil.getString(TRANSFORM, node);
+          NamedReference<?> ref;
+          if (child instanceof NamedReference) {
+            ref = (NamedReference<?>) child;
+          } else {
+            ref = child.ref();
+          }
+          Transform<?, ?> transform = Transforms.fromString(transformStr);
+          return (UnboundTerm<T>) new UnboundTransform(ref, transform);
         default:
           throw new IllegalArgumentException("Cannot parse type as a 
reference: " + type);
       }
diff --git 
a/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java
 
b/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java
index ff04145917..bff158a2a1 100644
--- 
a/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java
+++ 
b/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java
@@ -25,6 +25,7 @@ import static org.assertj.core.api.Assertions.assertThat;
 import java.math.BigDecimal;
 import java.util.UUID;
 import org.apache.iceberg.Schema;
+import org.apache.iceberg.expressions.ExpressionParser.SerializationMode;
 import org.apache.iceberg.types.Types;
 import org.junit.jupiter.api.Test;
 
@@ -499,8 +500,9 @@ public class TestExpressionParserWithResolvedReference {
     assertThat(jsonWithoutFieldIds).doesNotContain("fieldId");
 
     // Test serialization with field IDs (new behavior)
-    String jsonWithFieldIds = ExpressionParser.toJson(boundSimple, true, true);
-    assertThat(jsonWithFieldIds).contains("\"type\" : \"ref\"");
+    String jsonWithFieldIds =
+        ExpressionParser.toJson(boundSimple, true, 
SerializationMode.WITH_FIELD_IDS);
+    assertThat(jsonWithFieldIds).contains("\"type\" : \"reference\"");
     assertThat(jsonWithFieldIds).contains("\"name\" : \"id\"");
     assertThat(jsonWithFieldIds).contains("\"fieldId\" : 100");
 
@@ -511,7 +513,8 @@ public class TestExpressionParserWithResolvedReference {
             Expressions.greaterThan(Expressions.ref("id", 100), 50L));
     Expression boundComplex = Binder.bind(STRUCT_TYPE, complexExpr, true);
 
-    String complexJsonWithFieldIds = ExpressionParser.toJson(boundComplex, 
true, true);
+    String complexJsonWithFieldIds =
+        ExpressionParser.toJson(boundComplex, true, 
SerializationMode.WITH_FIELD_IDS);
     assertThat(complexJsonWithFieldIds).contains("\"fieldId\" : 100");
     assertThat(complexJsonWithFieldIds).contains("\"fieldId\" : 101");
     assertThat(complexJsonWithFieldIds).contains("\"name\" : \"id\"");
@@ -539,15 +542,17 @@ public class TestExpressionParserWithResolvedReference {
     assertThat(bucketJsonNoFieldIds).doesNotContain("fieldId");
 
     // Test serialization with field IDs (new behavior)
-    String bucketJsonWithFieldIds = ExpressionParser.toJson(boundBucket, true, 
true);
+    String bucketJsonWithFieldIds =
+        ExpressionParser.toJson(boundBucket, true, 
SerializationMode.WITH_FIELD_IDS);
     assertThat(bucketJsonWithFieldIds).contains("\"transform\" : 
\"bucket[8]\"");
-    assertThat(bucketJsonWithFieldIds).contains("\"type\" : \"ref\"");
+    assertThat(bucketJsonWithFieldIds).contains("\"type\" : \"reference\"");
     assertThat(bucketJsonWithFieldIds).contains("\"name\" : \"id\"");
     assertThat(bucketJsonWithFieldIds).contains("\"fieldId\" : 100");
 
-    String dayJsonWithFieldIds = ExpressionParser.toJson(boundDay, true, true);
+    String dayJsonWithFieldIds =
+        ExpressionParser.toJson(boundDay, true, 
SerializationMode.WITH_FIELD_IDS);
     assertThat(dayJsonWithFieldIds).contains("\"transform\" : \"day\"");
-    assertThat(dayJsonWithFieldIds).contains("\"type\" : \"ref\"");
+    assertThat(dayJsonWithFieldIds).contains("\"type\" : \"reference\"");
     assertThat(dayJsonWithFieldIds).contains("\"name\" : \"date\"");
     assertThat(dayJsonWithFieldIds).contains("\"fieldId\" : 107");
   }
@@ -568,7 +573,8 @@ public class TestExpressionParserWithResolvedReference {
     Expression bound = Binder.bind(STRUCT_TYPE, complexExpr, true);
 
     // Test serialization with field IDs
-    String jsonWithFieldIds = ExpressionParser.toJson(bound, true, true);
+    String jsonWithFieldIds =
+        ExpressionParser.toJson(bound, true, SerializationMode.WITH_FIELD_IDS);
 
     // Verify all field IDs are present
     assertThat(jsonWithFieldIds).contains("\"fieldId\" : 100"); // id field
@@ -588,7 +594,8 @@ public class TestExpressionParserWithResolvedReference {
     assertThat(jsonWithFieldIds).contains("\"transform\" : \"truncate[4]\"");
 
     // Verify ResolvedReference structure for both transforms and direct 
references
-    // Count occurrences of "type" : "ref" to ensure all references use 
ResolvedReference format
+    // Count occurrences of "type" : "reference" to ensure all references use 
ResolvedReference
+    // format
     long refTypeCount =
         jsonWithFieldIds
             .lines()
@@ -596,7 +603,7 @@ public class TestExpressionParserWithResolvedReference {
                 line -> {
                   int count = 0;
                   int index = 0;
-                  String pattern = "\"type\" : \"ref\"";
+                  String pattern = "\"type\" : \"reference\"";
                   while ((index = line.indexOf(pattern, index)) != -1) {
                     count++;
                     index += pattern.length();
@@ -610,7 +617,7 @@ public class TestExpressionParserWithResolvedReference {
 
   @Test
   public void testBoundExpressionRoundTripWithResolvedReference() {
-    // Test that bound expressions with ResolvedReference can be serialized 
and maintain information
+    // Test that bound expressions with ResolvedReference can be serialized 
and parsed back
 
     // Create expressions using both ResolvedTransform and ResolvedReference
     Expression originalExpr =
@@ -622,25 +629,75 @@ public class TestExpressionParserWithResolvedReference {
     Expression bound = Binder.bind(STRUCT_TYPE, originalExpr, true);
 
     // Serialize with field IDs
-    String jsonWithFieldIds = ExpressionParser.toJson(bound, true, true);
+    String jsonWithFieldIds =
+        ExpressionParser.toJson(bound, true, SerializationMode.WITH_FIELD_IDS);
 
     // Verify the JSON structure contains complete ResolvedReference 
information
-    assertThat(jsonWithFieldIds).contains("\"type\" : \"ref\"");
+    assertThat(jsonWithFieldIds).contains("\"type\" : \"reference\"");
     assertThat(jsonWithFieldIds).contains("\"fieldId\" : 100");
     assertThat(jsonWithFieldIds).contains("\"fieldId\" : 101");
 
-    // Note: The current parser doesn't support parsing ResolvedReference 
format back to expressions
-    // This test documents the serialization capability for external systems 
that need field ID
-    // information
+    // Parse back from JSON with field IDs
+    Expression parsed = ExpressionParser.fromJson(jsonWithFieldIds, SCHEMA);
 
-    // Verify that the bound expression maintains the same field IDs after 
serialization
-    BoundPredicate<?> boundPred = (BoundPredicate<?>) ((And) bound).left();
-    BoundTransform<?, ?> boundTransform = (BoundTransform<?, ?>) 
boundPred.term();
-    assertThat(boundTransform.ref().fieldId()).isEqualTo(100);
+    // Verify parsed expression uses ResolvedReference
+    assertThat(parsed).isInstanceOf(And.class);
+    And parsedAnd = (And) parsed;
+
+    UnboundPredicate<?> leftPred = (UnboundPredicate<?>) parsedAnd.left();
+    assertThat(leftPred.term()).isInstanceOf(UnboundTransform.class);
+    UnboundTransform<?, ?> leftTransform = (UnboundTransform<?, ?>) 
leftPred.term();
+    assertThat(leftTransform.ref()).isInstanceOf(ResolvedReference.class);
+    assertThat(((ResolvedReference<?>) 
leftTransform.ref()).fieldId()).isEqualTo(100);
+
+    UnboundPredicate<?> rightPred = (UnboundPredicate<?>) parsedAnd.right();
+    assertThat(rightPred.term()).isInstanceOf(ResolvedReference.class);
+    ResolvedReference<?> rightRef = (ResolvedReference<?>) rightPred.term();
+    assertThat(rightRef.fieldId()).isEqualTo(101);
+    assertThat(rightRef.name()).isEqualTo("data");
+
+    // Verify that binding the parsed expression produces the same result
+    Expression parsedBound = Binder.bind(STRUCT_TYPE, parsed, true);
+    assertThat(parsedBound.toString()).isEqualTo(bound.toString());
+  }
+
+  @Test
+  public void testResolvedReferenceRoundTripWithFieldIds() {
+    // Test that ResolvedReference expressions with field IDs can be 
serialized and parsed back
+    Expression original = Expressions.equal(Expressions.ref("id", 100), 42L);
+
+    // Serialize without binding (preserves ResolvedReference)
+    String json = ExpressionParser.toJson(original, true);
 
-    BoundPredicate<?> boundDataPred = (BoundPredicate<?>) ((And) 
bound).right();
-    BoundReference<?> boundDataRef = (BoundReference<?>) boundDataPred.term();
-    assertThat(boundDataRef.fieldId()).isEqualTo(101);
+    // For unbound expressions, field IDs are not included in JSON by default
+    // We need to manually create JSON with field IDs to test parsing
+    String jsonWithFieldIds =
+        "{\n"
+            + "  \"type\" : \"eq\",\n"
+            + "  \"term\" : {\n"
+            + "    \"type\" : \"reference\",\n"
+            + "    \"term\" : \"id\",\n"
+            + "    \"fieldId\" : 100\n"
+            + "  },\n"
+            + "  \"value\" : 42\n"
+            + "}";
+
+    // Parse the JSON with field IDs
+    Expression parsed = ExpressionParser.fromJson(jsonWithFieldIds, SCHEMA);
+
+    // Verify it creates a ResolvedReference
+    assertThat(parsed).isInstanceOf(UnboundPredicate.class);
+    UnboundPredicate<?> predicate = (UnboundPredicate<?>) parsed;
+    assertThat(predicate.term()).isInstanceOf(ResolvedReference.class);
+
+    ResolvedReference<?> resolvedRef = (ResolvedReference<?>) predicate.term();
+    assertThat(resolvedRef.name()).isEqualTo("id");
+    assertThat(resolvedRef.fieldId()).isEqualTo(100);
+
+    // Verify equivalence after binding
+    Expression originalBound = Binder.bind(STRUCT_TYPE, original, true);
+    Expression parsedBound = Binder.bind(STRUCT_TYPE, parsed, true);
+    assertThat(parsedBound.toString()).isEqualTo(originalBound.toString());
   }
 
   @Test
@@ -666,7 +723,7 @@ public class TestExpressionParserWithResolvedReference {
       Expression bound = Binder.bind(STRUCT_TYPE, expr, true);
 
       // Serialize with field IDs
-      String json = ExpressionParser.toJson(bound, true, true);
+      String json = ExpressionParser.toJson(bound, true, 
SerializationMode.WITH_FIELD_IDS);
 
       // Extract expected field ID from the original ResolvedReference
       UnboundPredicate<?> unboundPred = (UnboundPredicate<?>) expr;
@@ -675,7 +732,7 @@ public class TestExpressionParserWithResolvedReference {
 
       // Verify the field ID is preserved in the JSON
       assertThat(json).contains("\"fieldId\" : " + expectedFieldId);
-      assertThat(json).contains("\"type\" : \"ref\"");
+      assertThat(json).contains("\"type\" : \"reference\"");
     }
   }
 }

Reply via email to