This is an automated email from the ASF dual-hosted git repository. singhpk234 pushed a commit to branch feature/serialize-bound-expression in repository https://gitbox.apache.org/repos/asf/iceberg.git
commit ba63fcf387e314afc84a99c87dc1d7608c284364 Author: Prashant Kumar Singh <[email protected]> AuthorDate: Fri Sep 26 02:43:32 2025 +0000 more changes --- .../apache/iceberg/expressions/ExpressionUtil.java | 2 + .../apache/iceberg/expressions/Expressions.java | 32 + .../iceberg/expressions/ResolvedReference.java | 17 + .../iceberg/expressions/ResolvedTransform.java | 88 +++ .../expressions/TestExpressionSerialization.java | 21 +- .../iceberg/expressions/TestResolvedReference.java | 132 ++++ .../iceberg/expressions/ExpressionParser.java | 55 +- .../TestEnhancedExpressionParserWithFieldIds.java | 433 +++++++++++ .../TestExpressionParserWithResolvedReference.java | 863 +++++++++++++++++++++ 9 files changed, 1639 insertions(+), 4 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index d3dc00d914..6450321eda 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -232,6 +232,8 @@ public class ExpressionUtil { + ")"; } else if (term instanceof NamedReference) { return ((NamedReference<?>) term).name(); + } else if (term instanceof ResolvedReference) { + return ((ResolvedReference<?>) term).name(); } else if (term instanceof BoundReference) { return ((BoundReference<?>) term).name(); } else { diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java index 1e93d8e75e..658b296f63 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java @@ -102,6 +102,38 @@ public class Expressions { return new UnboundTransform<>(ref(name), Transforms.truncate(width)); } + @SuppressWarnings("unchecked") + public static <S, T> UnboundTerm<T> bucket(ResolvedReference<S> resolvedRef, int numBuckets) { + Transform<S, T> transform = (Transform<S, T>) Transforms.bucket(numBuckets); + return new ResolvedTransform<>(resolvedRef, transform); + } + + @SuppressWarnings("unchecked") + public static <S, T> UnboundTerm<T> year(ResolvedReference<S> resolvedRef) { + return new ResolvedTransform<>(resolvedRef, (Transform<S, T>) Transforms.year()); + } + + @SuppressWarnings("unchecked") + public static <S, T> UnboundTerm<T> month(ResolvedReference<S> resolvedRef) { + return new ResolvedTransform<>(resolvedRef, (Transform<S, T>) Transforms.month()); + } + + @SuppressWarnings("unchecked") + public static <S, T> UnboundTerm<T> day(ResolvedReference<S> resolvedRef) { + return new ResolvedTransform<>(resolvedRef, (Transform<S, T>) Transforms.day()); + } + + @SuppressWarnings("unchecked") + public static <S, T> UnboundTerm<T> hour(ResolvedReference<S> resolvedRef) { + return new ResolvedTransform<>(resolvedRef, (Transform<S, T>) Transforms.hour()); + } + + @SuppressWarnings("unchecked") + public static <S, T> UnboundTerm<T> truncate(ResolvedReference<S> resolvedRef, int width) { + Transform<S, T> transform = (Transform<S, T>) Transforms.truncate(width); + return new ResolvedTransform<>(resolvedRef, transform); + } + public static <T> UnboundTerm<T> extract(String name, String path, String type) { return new UnboundExtract<>(ref(name), path, type); } diff --git a/api/src/main/java/org/apache/iceberg/expressions/ResolvedReference.java b/api/src/main/java/org/apache/iceberg/expressions/ResolvedReference.java index b12bff5832..ab272d352b 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ResolvedReference.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ResolvedReference.java @@ -57,6 +57,23 @@ public class ResolvedReference<T> implements UnboundTerm<T>, Reference<T> { return new NamedReference<>(name); } + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ResolvedReference<?> that = (ResolvedReference<?>) o; + return fieldId == that.fieldId && name.equals(that.name); + } + + @Override + public int hashCode() { + return 31 * fieldId + name.hashCode(); + } + @Override public String toString() { return String.format("ref(name=\"%s\", fieldId=\"%s\")", name, fieldId); diff --git a/api/src/main/java/org/apache/iceberg/expressions/ResolvedTransform.java b/api/src/main/java/org/apache/iceberg/expressions/ResolvedTransform.java new file mode 100644 index 0000000000..287fc6d994 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/expressions/ResolvedTransform.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.expressions; + +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.transforms.Transform; +import org.apache.iceberg.types.Types; + +public class ResolvedTransform<S, T> implements UnboundTerm<T>, Term { + private final ResolvedReference<S> ref; + private final Transform<S, T> transform; + + ResolvedTransform(ResolvedReference<S> ref, Transform<S, T> transform) { + this.ref = ref; + this.transform = transform; + } + + @Override + public NamedReference<S> ref() { + return (NamedReference<S>) ref.ref(); + } + + public ResolvedReference<S> resolvedRef() { + return ref; + } + + public Transform<S, T> transform() { + return transform; + } + + @Override + public BoundTransform<S, T> bind(Types.StructType struct, boolean caseSensitive) { + BoundReference<S> boundRef = (BoundReference<S>) ref.bind(struct, caseSensitive); + + try { + ValidationException.check( + transform.canTransform(boundRef.type()), + "Cannot bind: %s cannot transform %s values from '%s'", + transform, + boundRef.type(), + ref.name()); + } catch (IllegalArgumentException e) { + throw new ValidationException( + "Cannot bind: %s cannot transform %s values from '%s'", + transform, boundRef.type(), ref.name()); + } + + return new BoundTransform<>(boundRef, transform); + } + + @Override + public String toString() { + return transform + "(" + ref + ")"; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ResolvedTransform<?, ?> that = (ResolvedTransform<?, ?>) o; + return ref.equals(that.ref) && transform.equals(that.transform); + } + + @Override + public int hashCode() { + return 31 * ref.hashCode() + transform.hashCode(); + } +} \ No newline at end of file diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionSerialization.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionSerialization.java index fc7ddd035b..6968f8c1fe 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionSerialization.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionSerialization.java @@ -61,7 +61,17 @@ public class TestExpressionSerialization { Expressions.notIn("s", "abc", "xyz").bind(schema.asStruct()), Expressions.isNull("a").bind(schema.asStruct()), Expressions.startsWith("s", "abc").bind(schema.asStruct()), - Expressions.notStartsWith("s", "xyz").bind(schema.asStruct()) + Expressions.notStartsWith("s", "xyz").bind(schema.asStruct()), + // ResolvedReference tests + Expressions.equal(Expressions.ref("a", 34), 5), + Expressions.in(Expressions.ref("s", 35), "abc", "xyz"), + Expressions.notNull(Expressions.ref("a", 34)), + Expressions.isNull(Expressions.ref("a", 34)), + Expressions.startsWith(Expressions.ref("s", 35), "test"), + // ResolvedReference bound tests + Expressions.equal(Expressions.ref("a", 34), 5).bind(schema.asStruct()), + Expressions.in(Expressions.ref("s", 35), "abc", "xyz").bind(schema.asStruct()), + Expressions.notNull(Expressions.ref("a", 34)).bind(schema.asStruct()) }; for (Expression expression : expressions) { @@ -196,6 +206,15 @@ public class TestExpressionSerialization { NamedReference rref = (NamedReference) right; return lref.name().equals(rref.name()); + } else if (left instanceof ResolvedReference) { + if (!(right instanceof ResolvedReference)) { + return false; + } + + ResolvedReference lref = (ResolvedReference) left; + ResolvedReference rref = (ResolvedReference) right; + + return lref.fieldId() == rref.fieldId() && lref.name().equals(rref.name()); } else if (left instanceof BoundReference) { if (!(right instanceof BoundReference)) { return false; diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestResolvedReference.java b/api/src/test/java/org/apache/iceberg/expressions/TestResolvedReference.java new file mode 100644 index 0000000000..54c5694138 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/expressions/TestResolvedReference.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.expressions; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.apache.iceberg.Schema; +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestResolvedReference { + private static final Schema SCHEMA = + new Schema( + Types.NestedField.optional(34, "a", Types.IntegerType.get()), + Types.NestedField.required(35, "s", Types.StringType.get())); + + @Test + public void testResolvedReferenceEquality() { + ResolvedReference<Integer> ref1 = new ResolvedReference<>("a", 34); + ResolvedReference<Integer> ref2 = new ResolvedReference<>("a", 34); + ResolvedReference<Integer> ref3 = new ResolvedReference<>("b", 34); + ResolvedReference<Integer> ref4 = new ResolvedReference<>("a", 35); + + // Equal references + assertThat(ref1).isEqualTo(ref2); + assertThat(ref1.hashCode()).isEqualTo(ref2.hashCode()); + + // Different names, same fieldId + assertThat(ref1).isNotEqualTo(ref3); + + // Same name, different fieldId + assertThat(ref1).isNotEqualTo(ref4); + } + + @Test + public void testResolvedReferenceBind() { + ResolvedReference<Integer> ref = new ResolvedReference<>("a", 34); + BoundTerm<Integer> bound = ref.bind(SCHEMA.asStruct(), true); + + assertThat(bound).isInstanceOf(BoundReference.class); + BoundReference<Integer> boundRef = (BoundReference<Integer>) bound; + assertThat(boundRef.fieldId()).isEqualTo(34); + assertThat(boundRef.name()).isEqualTo("a"); + assertThat(boundRef.type()).isEqualTo(Types.IntegerType.get()); + } + + @Test + public void testResolvedReferenceBindIgnoresCaseSensitivity() { + ResolvedReference<Integer> ref = new ResolvedReference<>("A", 34); + + // Should work regardless of case sensitivity since we use fieldId + BoundTerm<Integer> bound1 = ref.bind(SCHEMA.asStruct(), true); + BoundTerm<Integer> bound2 = ref.bind(SCHEMA.asStruct(), false); + + assertThat(bound1).isInstanceOf(BoundReference.class); + assertThat(bound2).isInstanceOf(BoundReference.class); + assertThat(((BoundReference<Integer>) bound1).fieldId()).isEqualTo(34); + assertThat(((BoundReference<Integer>) bound2).fieldId()).isEqualTo(34); + } + + @Test + public void testResolvedReferenceBindWithInvalidFieldId() { + ResolvedReference<Integer> ref = new ResolvedReference<>("invalid", 999); + + assertThatThrownBy(() -> ref.bind(SCHEMA.asStruct(), true)) + .isInstanceOf(ValidationException.class) + .hasMessageContaining("Cannot find field 'invalid' in struct"); + } + + @Test + public void testResolvedReferenceRef() { + ResolvedReference<Integer> ref = new ResolvedReference<>("a", 34); + NamedReference<?> namedRef = ref.ref(); + + assertThat(namedRef.name()).isEqualTo("a"); + } + + @Test + public void testResolvedReferenceToString() { + ResolvedReference<Integer> ref = new ResolvedReference<>("a", 34); + + assertThat(ref.toString()).isEqualTo("ref(name=\"a\", fieldId=\"34\")"); + } + + @Test + public void testResolvedReferenceExpressionIntegration() { + // Test that ResolvedReference works in expression predicates + Expression expr = Expressions.equal(Expressions.ref("a", 34), 5); + assertThat(expr).isInstanceOf(UnboundPredicate.class); + + UnboundPredicate<?> predicate = (UnboundPredicate<?>) expr; + assertThat(predicate.term()).isInstanceOf(ResolvedReference.class); + + ResolvedReference<?> resolvedRef = (ResolvedReference<?>) predicate.term(); + assertThat(resolvedRef.name()).isEqualTo("a"); + assertThat(resolvedRef.fieldId()).isEqualTo(34); + } + + @Test + public void testResolvedReferenceUnbind() { + // Test that unbinding a bound reference returns a NamedReference for compatibility + Expression expr = Expressions.equal(Expressions.ref("a", 34), 5); + Expression boundExpr = Binder.bind(SCHEMA.asStruct(), expr, true); + + assertThat(boundExpr).isInstanceOf(BoundPredicate.class); + BoundPredicate<?> boundPred = (BoundPredicate<?>) boundExpr; + + UnboundTerm<?> unbound = ExpressionUtil.unbind(boundPred.term()); + assertThat(unbound).isInstanceOf(NamedReference.class); + + NamedReference<?> namedRef = (NamedReference<?>) unbound; + assertThat(namedRef.name()).isEqualTo("a"); + } +} \ No newline at end of file diff --git a/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java b/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java index 9bb5b7d05f..396520f997 100644 --- a/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java +++ b/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java @@ -62,16 +62,27 @@ public class ExpressionParser { return JsonUtil.generate(gen -> toJson(expression, gen), pretty); } + public static String toJson(Expression expression, boolean pretty, boolean includeFieldIds) { + Preconditions.checkArgument(expression != null, "Invalid expression: null"); + return JsonUtil.generate(gen -> toJson(expression, gen, includeFieldIds), pretty); + } + public static void toJson(Expression expression, JsonGenerator gen) { - ExpressionVisitors.visit(expression, new JsonGeneratorVisitor(gen)); + ExpressionVisitors.visit(expression, new JsonGeneratorVisitor(gen, false)); + } + + public static void toJson(Expression expression, JsonGenerator gen, boolean includeFieldIds) { + ExpressionVisitors.visit(expression, new JsonGeneratorVisitor(gen, includeFieldIds)); } private static class JsonGeneratorVisitor extends ExpressionVisitors.CustomOrderExpressionVisitor<Void> { private final JsonGenerator gen; + private final boolean includeFieldIds; - private JsonGeneratorVisitor(JsonGenerator gen) { + private JsonGeneratorVisitor(JsonGenerator gen, boolean includeFieldIds) { this.gen = gen; + this.includeFieldIds = includeFieldIds; } /** @@ -238,9 +249,29 @@ public class ExpressionParser { UnboundTransform<?, ?> transform = (UnboundTransform<?, ?>) term; transform(transform.transform().toString(), transform.ref().name()); return; + } else if (term instanceof ResolvedTransform) { + ResolvedTransform<?, ?> transform = (ResolvedTransform<?, ?>) term; + if (includeFieldIds) { + transformWithFieldId(transform.transform().toString(), transform.resolvedRef().name(), transform.resolvedRef().fieldId()); + } else { + transform(transform.transform().toString(), transform.ref().name()); + } + return; } else if (term instanceof BoundTransform) { BoundTransform<?, ?> transform = (BoundTransform<?, ?>) term; - transform(transform.transform().toString(), transform.ref().name()); + if (includeFieldIds) { + transformWithFieldId(transform.transform().toString(), transform.ref().name(), transform.ref().fieldId()); + } else { + transform(transform.transform().toString(), transform.ref().name()); + } + return; + } else if (term instanceof BoundReference) { + BoundReference<?> ref = (BoundReference<?>) term; + if (includeFieldIds) { + referenceWithFieldId(ref.name(), ref.fieldId()); + } else { + gen.writeString(ref.name()); + } return; } else if (term instanceof Reference) { gen.writeString(((Reference<?>) term).name()); @@ -257,6 +288,24 @@ public class ExpressionParser { gen.writeStringField(TERM, name); gen.writeEndObject(); } + + private void transformWithFieldId(String transform, String name, int fieldId) throws IOException { + gen.writeStartObject(); + gen.writeStringField(TYPE, TRANSFORM); + gen.writeStringField(TRANSFORM, transform); + // Write term as a ResolvedReference object + gen.writeFieldName(TERM); + referenceWithFieldId(name, fieldId); + gen.writeEndObject(); + } + + private void referenceWithFieldId(String name, int fieldId) throws IOException { + gen.writeStartObject(); + gen.writeStringField("type", "ref"); + gen.writeStringField("name", name); + gen.writeNumberField("fieldId", fieldId); + gen.writeEndObject(); + } } public static Expression fromJson(String json) { diff --git a/core/src/test/java/org/apache/iceberg/expressions/TestEnhancedExpressionParserWithFieldIds.java b/core/src/test/java/org/apache/iceberg/expressions/TestEnhancedExpressionParserWithFieldIds.java new file mode 100644 index 0000000000..9d5817e82d --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/expressions/TestEnhancedExpressionParserWithFieldIds.java @@ -0,0 +1,433 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.expressions; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.function.Supplier; +import org.apache.iceberg.Schema; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.JsonUtil; +import org.junit.jupiter.api.Test; + +/** + * Test demonstrating how ExpressionParser could be enhanced to support field IDs + * for ResolvedReference serialization/deserialization. + * + * This is a proof of concept showing the enhanced JSON format that would preserve + * field ID information during serialization round-trips. + */ +public class TestEnhancedExpressionParserWithFieldIds { + + private static final Types.StructType STRUCT_TYPE = + Types.StructType.of( + required(100, "id", Types.LongType.get()), + optional(101, "data", Types.StringType.get()), + required(102, "active", Types.BooleanType.get())); + + private static final Schema SCHEMA = new Schema(STRUCT_TYPE.fields()); + + // Enhanced JSON format constants for field ID support + private static final String FIELD_ID = "field-id"; + private static final String REFERENCE_WITH_ID = "resolved-reference"; + + @Test + public void testEnhancedJsonFormatWithFieldIds() { + // Test the enhanced JSON format that would support field IDs + Expression resolvedExpr = Expressions.equal(Expressions.ref("data", 101), "test"); + + // Generate enhanced JSON manually to show the concept + String enhancedJson = generateEnhancedJson(resolvedExpr); + + // Expected enhanced JSON structure with field ID + String expectedJson = "{\n" + + " \"type\" : \"eq\",\n" + + " \"term\" : {\n" + + " \"type\" : \"resolved-reference\",\n" + + " \"name\" : \"data\",\n" + + " \"field-id\" : 101\n" + + " },\n" + + " \"value\" : \"test\"\n" + + "}"; + + assertThat(enhancedJson).isEqualTo(expectedJson); + } + + @Test + public void testEnhancedParsingWithFieldIds() { + // Test parsing enhanced JSON that includes field IDs + String enhancedJson = "{\n" + + " \"type\" : \"eq\",\n" + + " \"term\" : {\n" + + " \"type\" : \"resolved-reference\",\n" + + " \"name\" : \"data\",\n" + + " \"field-id\" : 101\n" + + " },\n" + + " \"value\" : \"test\"\n" + + "}"; + + // Parse using enhanced parser (concept) + Expression parsed = parseEnhancedJson(enhancedJson); + + assertThat(parsed).isInstanceOf(UnboundPredicate.class); + UnboundPredicate<?> predicate = (UnboundPredicate<?>) parsed; + assertThat(predicate.term()).isInstanceOf(ResolvedReference.class); + + ResolvedReference<?> resolvedRef = (ResolvedReference<?>) predicate.term(); + assertThat(resolvedRef.name()).isEqualTo("data"); + assertThat(resolvedRef.fieldId()).isEqualTo(101); + } + + @Test + public void testBackwardCompatibilityWithExistingFormat() { + // Test that enhanced parser can handle existing JSON format without field IDs + String standardJson = "{\n" + + " \"type\" : \"eq\",\n" + + " \"term\" : \"data\",\n" + + " \"value\" : \"test\"\n" + + "}"; + + Expression parsed = parseEnhancedJson(standardJson); + + assertThat(parsed).isInstanceOf(UnboundPredicate.class); + UnboundPredicate<?> predicate = (UnboundPredicate<?>) parsed; + assertThat(predicate.term()).isInstanceOf(NamedReference.class); + + NamedReference<?> namedRef = (NamedReference<?>) predicate.term(); + assertThat(namedRef.name()).isEqualTo("data"); + } + + @Test + public void testComplexExpressionWithMixedReferences() { + // Test complex expression with both ResolvedReference and NamedReference + Expression mixedExpr = Expressions.and( + Expressions.equal(Expressions.ref("data", 101), "test"), // ResolvedReference + Expressions.isNull("active")); // NamedReference + + String enhancedJson = generateEnhancedJson(mixedExpr); + + // Should contain both reference types in JSON + assertThat(enhancedJson).contains("\"resolved-reference\""); + assertThat(enhancedJson).contains("\"field-id\" : 101"); + assertThat(enhancedJson).contains("\"active\""); + + // Parse back and verify + Expression parsed = parseEnhancedJson(enhancedJson); + assertThat(parsed).isInstanceOf(And.class); + + And andExpr = (And) parsed; + + // Left side should be ResolvedReference + UnboundPredicate<?> leftPred = (UnboundPredicate<?>) andExpr.left(); + assertThat(leftPred.term()).isInstanceOf(ResolvedReference.class); + + // Right side should be NamedReference + UnboundPredicate<?> rightPred = (UnboundPredicate<?>) andExpr.right(); + assertThat(rightPred.term()).isInstanceOf(NamedReference.class); + } + + @Test + public void testFieldIdPreservationThroughRoundTrip() { + // Test that field IDs are preserved through complete round-trip + Expression original = Expressions.and( + Expressions.greaterThan(Expressions.ref("id", 100), 50L), + Expressions.equal(Expressions.ref("data", 101), "test")); + + // Generate enhanced JSON + String json = generateEnhancedJson(original); + + // Parse back + Expression parsed = parseEnhancedJson(json); + + // Verify structure is preserved + assertThat(parsed).isInstanceOf(And.class); + And andExpr = (And) parsed; + + // Check left predicate (id > 50) + UnboundPredicate<?> leftPred = (UnboundPredicate<?>) andExpr.left(); + ResolvedReference<?> leftRef = (ResolvedReference<?>) leftPred.term(); + assertThat(leftRef.name()).isEqualTo("id"); + assertThat(leftRef.fieldId()).isEqualTo(100); + + // Check right predicate (data = "test") + UnboundPredicate<?> rightPred = (UnboundPredicate<?>) andExpr.right(); + ResolvedReference<?> rightRef = (ResolvedReference<?>) rightPred.term(); + assertThat(rightRef.name()).isEqualTo("data"); + assertThat(rightRef.fieldId()).isEqualTo(101); + } + + // Helper methods to demonstrate enhanced JSON generation and parsing + + private String generateEnhancedJson(Expression expr) { + return JsonUtil.generate(gen -> { + try { + generateEnhanced(expr, gen); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }, true); + } + + private void generateEnhanced(Expression expr, JsonGenerator gen) throws IOException { + ExpressionVisitors.visit(expr, new EnhancedJsonVisitor(gen)); + } + + private Expression parseEnhancedJson(String json) { + try { + JsonNode node = JsonUtil.mapper().readTree(json); + return parseEnhancedExpression(node); + } catch (Exception e) { + throw new RuntimeException("Failed to parse JSON", e); + } + } + + // Enhanced JSON visitor that supports field IDs + private static class EnhancedJsonVisitor extends ExpressionVisitors.CustomOrderExpressionVisitor<Void> { + private final JsonGenerator gen; + + EnhancedJsonVisitor(JsonGenerator gen) { + this.gen = gen; + } + + private void toJson(Supplier<Void> child) { + child.get(); + } + + private Void generate(Runnable task) { + try { + task.run(); + return null; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public Void alwaysTrue() { + return generate(() -> { + try { + gen.writeBoolean(true); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } + + @Override + public Void alwaysFalse() { + return generate(() -> { + try { + gen.writeBoolean(false); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } + + @Override + public Void not(Supplier<Void> result) { + return generate(() -> { + try { + gen.writeStartObject(); + gen.writeStringField("type", "not"); + gen.writeFieldName("child"); + toJson(result); + gen.writeEndObject(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } + + @Override + public Void and(Supplier<Void> leftResult, Supplier<Void> rightResult) { + return generate(() -> { + try { + gen.writeStartObject(); + gen.writeStringField("type", "and"); + gen.writeFieldName("left"); + toJson(leftResult); + gen.writeFieldName("right"); + toJson(rightResult); + gen.writeEndObject(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } + + @Override + public Void or(Supplier<Void> leftResult, Supplier<Void> rightResult) { + return generate(() -> { + try { + gen.writeStartObject(); + gen.writeStringField("type", "or"); + gen.writeFieldName("left"); + toJson(leftResult); + gen.writeFieldName("right"); + toJson(rightResult); + gen.writeEndObject(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } + + @Override + public <T> Void predicate(UnboundPredicate<T> pred) { + return generate(() -> { + try { + gen.writeStartObject(); + gen.writeStringField("type", pred.op().toString().toLowerCase().replace("_", "-")); + gen.writeFieldName("term"); + writeTerm(pred.term()); + + if (pred.literals() != null && !pred.literals().isEmpty()) { + if (pred.literals().size() == 1) { + gen.writeFieldName("value"); + writeLiteral(pred.literals().get(0)); + } else { + gen.writeFieldName("values"); + gen.writeStartArray(); + for (Literal<T> literal : pred.literals()) { + writeLiteral(literal); + } + gen.writeEndArray(); + } + } + + gen.writeEndObject(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } + + private void writeTerm(UnboundTerm<?> term) throws IOException { + if (term instanceof ResolvedReference) { + ResolvedReference<?> resolvedRef = (ResolvedReference<?>) term; + gen.writeStartObject(); + gen.writeStringField("type", REFERENCE_WITH_ID); + gen.writeStringField("name", resolvedRef.name()); + gen.writeNumberField(FIELD_ID, resolvedRef.fieldId()); + gen.writeEndObject(); + } else if (term instanceof NamedReference) { + gen.writeString(((NamedReference<?>) term).name()); + } else { + throw new UnsupportedOperationException("Unsupported term type: " + term.getClass()); + } + } + + private void writeLiteral(Literal<?> literal) throws IOException { + Object value = literal.value(); + if (value instanceof String) { + gen.writeString((String) value); + } else if (value instanceof Long) { + gen.writeNumber((Long) value); + } else if (value instanceof Integer) { + gen.writeNumber((Integer) value); + } else if (value instanceof Boolean) { + gen.writeBoolean((Boolean) value); + } else if (value instanceof Double) { + gen.writeNumber((Double) value); + } else if (value instanceof Float) { + gen.writeNumber((Float) value); + } else { + gen.writeString(value.toString()); + } + } + } + + // Enhanced expression parser that supports field IDs + private Expression parseEnhancedExpression(JsonNode node) { + if (node.isBoolean()) { + return node.asBoolean() ? Expressions.alwaysTrue() : Expressions.alwaysFalse(); + } + + if (!node.isObject()) { + throw new IllegalArgumentException("Cannot parse expression from: " + node); + } + + String type = JsonUtil.getString("type", node); + switch (type) { + case "and": + return Expressions.and( + parseEnhancedExpression(JsonUtil.get("left", node)), + parseEnhancedExpression(JsonUtil.get("right", node))); + case "or": + return Expressions.or( + parseEnhancedExpression(JsonUtil.get("left", node)), + parseEnhancedExpression(JsonUtil.get("right", node))); + case "not": + return Expressions.not(parseEnhancedExpression(JsonUtil.get("child", node))); + case "eq": + return Expressions.equal( + parseEnhancedTerm(JsonUtil.get("term", node)), + parseValue(JsonUtil.get("value", node))); + case "gt": + return Expressions.greaterThan( + parseEnhancedTerm(JsonUtil.get("term", node)), + parseValue(JsonUtil.get("value", node))); + case "is-null": + return Expressions.isNull(parseEnhancedTerm(JsonUtil.get("term", node))); + default: + throw new IllegalArgumentException("Unknown expression type: " + type); + } + } + + @SuppressWarnings("unchecked") + private <T> UnboundTerm<T> parseEnhancedTerm(JsonNode node) { + if (node.isTextual()) { + return Expressions.ref(node.asText()); + } + + if (node.isObject()) { + String type = JsonUtil.getString("type", node); + if (REFERENCE_WITH_ID.equals(type)) { + String name = JsonUtil.getString("name", node); + int fieldId = JsonUtil.getInt(FIELD_ID, node); + return Expressions.ref(name, fieldId); + } + } + + throw new IllegalArgumentException("Cannot parse term from: " + node); + } + + private Object parseValue(JsonNode node) { + if (node.isTextual()) { + return node.asText(); + } else if (node.isLong()) { + return node.asLong(); + } else if (node.isInt()) { + return node.asInt(); + } else if (node.isBoolean()) { + return node.asBoolean(); + } else if (node.isDouble()) { + return node.asDouble(); + } + return node.asText(); + } +} \ No newline at end of file diff --git a/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java b/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java new file mode 100644 index 0000000000..97981a6471 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java @@ -0,0 +1,863 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.expressions; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; + +import java.math.BigDecimal; +import java.util.UUID; +import org.apache.iceberg.Schema; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestExpressionParserWithResolvedReference { + + private static final Types.StructType STRUCT_TYPE = + Types.StructType.of( + required(100, "id", Types.LongType.get()), + optional(101, "data", Types.StringType.get()), + required(102, "b", Types.BooleanType.get()), + optional(103, "i", Types.IntegerType.get()), + required(104, "l", Types.LongType.get()), + optional(105, "f", Types.FloatType.get()), + required(106, "d", Types.DoubleType.get()), + optional(107, "date", Types.DateType.get()), + required(108, "ts", Types.TimestampType.withoutZone()), + required(110, "s", Types.StringType.get()), + required(111, "uuid", Types.UUIDType.get()), + required(112, "fixed", Types.FixedType.ofLength(7)), + optional(113, "bytes", Types.BinaryType.get()), + required(114, "dec_9_0", Types.DecimalType.of(9, 0)), + required(115, "dec_11_2", Types.DecimalType.of(11, 2))); + + private static final Schema SCHEMA = new Schema(STRUCT_TYPE.fields()); + + @Test + public void testResolvedReferenceExpressionSerialization() { + // Create expressions using ResolvedReference + Expression[] resolvedExpressions = new Expression[] { + Expressions.equal(Expressions.ref("id", 100), 42L), + Expressions.lessThan(Expressions.ref("data", 101), "test"), + Expressions.greaterThanOrEqual(Expressions.ref("i", 103), 10), + Expressions.isNull(Expressions.ref("f", 105)), + Expressions.notNull(Expressions.ref("date", 107)), + Expressions.startsWith(Expressions.ref("s", 110), "prefix"), + Expressions.in(Expressions.ref("l", 104), 1L, 2L, 3L), + Expressions.notIn(Expressions.ref("b", 102), true, false), + Expressions.isNaN(Expressions.ref("d", 106)), + Expressions.notNaN(Expressions.ref("f", 105)) + }; + + for (Expression expr : resolvedExpressions) { + // Verify the expression uses ResolvedReference + assertThat(expr).isInstanceOf(UnboundPredicate.class); + UnboundPredicate<?> predicate = (UnboundPredicate<?>) expr; + assertThat(predicate.term()).isInstanceOf(ResolvedReference.class); + + // Test JSON serialization + String json = ExpressionParser.toJson(expr, true); + assertThat(json).isNotNull(); + assertThat(json).contains("\"type\""); + + // Test that JSON contains the field name (not field ID since parser doesn't support it yet) + ResolvedReference<?> resolvedRef = (ResolvedReference<?>) predicate.term(); + assertThat(json).contains(resolvedRef.name()); + } + } + + @Test + public void testResolvedReferenceRoundTripCompatibility() { + // Test that ResolvedReference expressions can be serialized and parsed back + Expression resolvedExpr = Expressions.equal(Expressions.ref("id", 100), 42L); + Expression namedExpr = Expressions.equal(Expressions.ref("id"), 42L); + + // Both should produce the same JSON since parser only uses names + String resolvedJson = ExpressionParser.toJson(resolvedExpr, true); + String namedJson = ExpressionParser.toJson(namedExpr, true); + assertThat(resolvedJson).isEqualTo(namedJson); + + // Parse back and verify equivalence + Expression parsedFromResolved = ExpressionParser.fromJson(resolvedJson, SCHEMA); + Expression parsedFromNamed = ExpressionParser.fromJson(namedJson, SCHEMA); + + // Both parsed expressions should be equivalent + assertThat(ExpressionUtil.equivalent(parsedFromResolved, parsedFromNamed, STRUCT_TYPE, true)) + .isTrue(); + + // The parsed expression should be equivalent to the original named reference expression + assertThat(ExpressionUtil.equivalent(namedExpr, parsedFromResolved, STRUCT_TYPE, true)) + .isTrue(); + } + + @Test + public void testResolvedReferenceComplexExpressions() { + // Test complex expressions with ResolvedReference + Expression complexExpr = Expressions.and( + Expressions.or( + Expressions.equal(Expressions.ref("data", 101), "test"), + Expressions.isNull(Expressions.ref("data", 101))), + Expressions.greaterThanOrEqual(Expressions.ref("id", 100), 100L)); + + // Serialize to JSON + String json = ExpressionParser.toJson(complexExpr, true); + assertThat(json).contains("\"type\" : \"and\""); + assertThat(json).contains("\"type\" : \"or\""); + assertThat(json).contains("\"data\""); + assertThat(json).contains("\"id\""); + + // Parse back + Expression parsed = ExpressionParser.fromJson(json, SCHEMA); + + // Create equivalent expression with NamedReference for comparison + Expression namedEquivalent = Expressions.and( + Expressions.or( + Expressions.equal("data", "test"), + Expressions.isNull("data")), + Expressions.greaterThanOrEqual("id", 100L)); + + // Should be equivalent + assertThat(ExpressionUtil.equivalent(namedEquivalent, parsed, STRUCT_TYPE, true)) + .isTrue(); + } + + @Test + public void testResolvedReferenceTransformExpressions() { + // Test transform expressions - using NamedReference for transforms since + // ResolvedReference needs proper type parameters for transform methods + Expression dayTransform = Expressions.equal( + Expressions.day("date"), "2023-01-15"); + Expression bucketTransform = Expressions.equal( + Expressions.bucket("id", 10), 5); + + // Test serialization + String dayJson = ExpressionParser.toJson(dayTransform, true); + String bucketJson = ExpressionParser.toJson(bucketTransform, true); + + assertThat(dayJson).contains("\"transform\" : \"day\""); + assertThat(dayJson).contains("\"term\" : \"date\""); + assertThat(bucketJson).contains("\"transform\" : \"bucket[10]\""); + assertThat(bucketJson).contains("\"term\" : \"id\""); + + // Test round-trip + Expression parsedDay = ExpressionParser.fromJson(dayJson, SCHEMA); + Expression parsedBucket = ExpressionParser.fromJson(bucketJson, SCHEMA); + + // Should maintain equivalence after round-trip + assertThat(ExpressionUtil.equivalent(dayTransform, parsedDay, STRUCT_TYPE, true)) + .isTrue(); + assertThat(ExpressionUtil.equivalent(bucketTransform, parsedBucket, STRUCT_TYPE, true)) + .isTrue(); + } + + @Test + public void testResolvedReferenceBindingAfterParsing() { + // Test that expressions with ResolvedReference bind correctly after parsing + Expression original = Expressions.equal(Expressions.ref("id", 100), 42L); + + // Serialize and parse + String json = ExpressionParser.toJson(original, true); + Expression parsed = ExpressionParser.fromJson(json, SCHEMA); + + // Both should bind successfully + Expression originalBound = Binder.bind(STRUCT_TYPE, original, true); + Expression parsedBound = Binder.bind(STRUCT_TYPE, parsed, true); + + // Both bound expressions should be identical + assertThat(originalBound).isInstanceOf(BoundPredicate.class); + assertThat(parsedBound).isInstanceOf(BoundPredicate.class); + + BoundPredicate<?> originalBoundPred = (BoundPredicate<?>) originalBound; + BoundPredicate<?> parsedBoundPred = (BoundPredicate<?>) parsedBound; + + // Should reference the same field + assertThat(originalBoundPred.ref().fieldId()).isEqualTo(100); + assertThat(parsedBoundPred.ref().fieldId()).isEqualTo(100); + assertThat(originalBoundPred.ref().name()).isEqualTo("id"); + assertThat(parsedBoundPred.ref().name()).isEqualTo("id"); + } + + @Test + public void testResolvedReferenceWithDifferentTypes() { + // Test ResolvedReference with various data types + Expression[] typedExpressions = new Expression[] { + Expressions.equal(Expressions.ref("b", 102), true), + Expressions.equal(Expressions.ref("i", 103), 42), + Expressions.equal(Expressions.ref("l", 104), 42L), + Expressions.equal(Expressions.ref("f", 105), 3.14f), + Expressions.equal(Expressions.ref("d", 106), 3.14159), + Expressions.equal(Expressions.ref("s", 110), "test string"), + Expressions.equal(Expressions.ref("uuid", 111), UUID.randomUUID()), + Expressions.equal(Expressions.ref("dec_11_2", 115), new BigDecimal("123.45")) + }; + + for (Expression expr : typedExpressions) { + // Test serialization doesn't break with different types + String json = ExpressionParser.toJson(expr, true); + assertThat(json).isNotNull(); + + // Test parsing back + Expression parsed = ExpressionParser.fromJson(json, SCHEMA); + assertThat(parsed).isNotNull(); + + // Test binding + Expression bound = Binder.bind(STRUCT_TYPE, parsed, true); + assertThat(bound).isInstanceOf(BoundPredicate.class); + } + } + + @Test + public void testResolvedReferenceJsonStructure() { + // Test the exact JSON structure produced by ResolvedReference + Expression expr = Expressions.equal(Expressions.ref("data", 101), "test"); + String json = ExpressionParser.toJson(expr, true); + + // The JSON should look like a regular reference since parser doesn't support field IDs yet + String expectedStructure = "{\n" + + " \"type\" : \"eq\",\n" + + " \"term\" : \"data\",\n" + + " \"value\" : \"test\"\n" + + "}"; + + assertThat(json).isEqualTo(expectedStructure); + + // Verify it parses back correctly + Expression parsed = ExpressionParser.fromJson(json); + assertThat(parsed).isInstanceOf(UnboundPredicate.class); + + UnboundPredicate<?> predicate = (UnboundPredicate<?>) parsed; + assertThat(predicate.term()).isInstanceOf(NamedReference.class); + assertThat(predicate.term().ref().name()).isEqualTo("data"); + } + + @Test + public void testResolvedReferenceEquivalenceAfterSerialization() { + // Test that ResolvedReference expressions maintain equivalence after serialization + Expression resolvedExpr = Expressions.and( + Expressions.greaterThan(Expressions.ref("id", 100), 50L), + Expressions.lessThan(Expressions.ref("id", 100), 200L)); + + Expression namedExpr = Expressions.and( + Expressions.greaterThan("id", 50L), + Expressions.lessThan("id", 200L)); + + // Serialize both + String resolvedJson = ExpressionParser.toJson(resolvedExpr, true); + String namedJson = ExpressionParser.toJson(namedExpr, true); + + // Should produce identical JSON + assertThat(resolvedJson).isEqualTo(namedJson); + + // Parse both back + Expression parsedResolved = ExpressionParser.fromJson(resolvedJson, SCHEMA); + Expression parsedNamed = ExpressionParser.fromJson(namedJson, SCHEMA); + + // All should be equivalent + assertThat(ExpressionUtil.equivalent(resolvedExpr, namedExpr, STRUCT_TYPE, true)) + .isTrue(); + assertThat(ExpressionUtil.equivalent(parsedResolved, parsedNamed, STRUCT_TYPE, true)) + .isTrue(); + assertThat(ExpressionUtil.equivalent(resolvedExpr, parsedResolved, STRUCT_TYPE, true)) + .isTrue(); + } + + @Test + public void testResolvedReferenceTransformExpressionEquivalence() { + // Test expressions that reference transforms where the transform terms are equivalent to resolved references + // Since UnboundTransform only accepts NamedReference, we test equivalence through binding/unbinding + + // Create transform expressions using NamedReference (current approach) + Expression dayTransformNamed = Expressions.equal(Expressions.day("date"), "2023-01-15"); + Expression bucketTransformNamed = Expressions.equal(Expressions.bucket("id", 10), 5); + Expression truncateTransformNamed = Expressions.equal(Expressions.truncate("data", 4), "test"); + + // Create equivalent expressions using ResolvedReference for the predicate terms + Expression dayWithResolvedRef = Expressions.equal(Expressions.ref("date", 107), "2023-01-15"); + Expression bucketWithResolvedRef = Expressions.equal(Expressions.ref("id", 100), 5L); + Expression truncateWithResolvedRef = Expressions.equal(Expressions.ref("data", 101), "test"); + + // Bind all expressions + Expression boundDayTransform = Binder.bind(STRUCT_TYPE, dayTransformNamed, true); + Expression boundBucketTransform = Binder.bind(STRUCT_TYPE, bucketTransformNamed, true); + Expression boundTruncateTransform = Binder.bind(STRUCT_TYPE, truncateTransformNamed, true); + + Expression boundDayResolved = Binder.bind(STRUCT_TYPE, dayWithResolvedRef, true); + Expression boundBucketResolved = Binder.bind(STRUCT_TYPE, bucketWithResolvedRef, true); + Expression boundTruncateResolved = Binder.bind(STRUCT_TYPE, truncateWithResolvedRef, true); + + // Verify all expressions bound successfully + assertThat(boundDayTransform).isInstanceOf(BoundPredicate.class); + assertThat(boundBucketTransform).isInstanceOf(BoundPredicate.class); + assertThat(boundTruncateTransform).isInstanceOf(BoundPredicate.class); + assertThat(boundDayResolved).isInstanceOf(BoundPredicate.class); + assertThat(boundBucketResolved).isInstanceOf(BoundPredicate.class); + assertThat(boundTruncateResolved).isInstanceOf(BoundPredicate.class); + + // Test transform expressions in complex expressions with resolved references + Expression complexTransformExpr = Expressions.and( + Expressions.equal(Expressions.day("date"), "2023-01-15"), + Expressions.equal(Expressions.ref("id", 100), 42L)); + + Expression boundComplexExpr = Binder.bind(STRUCT_TYPE, complexTransformExpr, true); + assertThat(boundComplexExpr).isInstanceOf(And.class); + + // Verify serialization works for transform expressions that coexist with resolved references + String complexJson = ExpressionParser.toJson(complexTransformExpr, true); + assertThat(complexJson).contains("\"transform\" : \"day\""); + assertThat(complexJson).contains("\"term\" : \"date\""); + assertThat(complexJson).contains("\"term\" : \"id\""); + + // Verify round-trip maintains correctness + Expression parsedComplex = ExpressionParser.fromJson(complexJson, SCHEMA); + Expression boundParsedComplex = Binder.bind(STRUCT_TYPE, parsedComplex, true); + + // Both bound expressions should reference the same fields + assertThat(boundComplexExpr.toString()).isEqualTo(boundParsedComplex.toString()); + } + + @Test + public void testResolvedReferenceInComplexTransformExpressions() { + // Test complex expressions that combine transforms with resolved references + Expression complexExpr = Expressions.or( + Expressions.and( + Expressions.equal(Expressions.bucket("id", 8), 3), + Expressions.equal(Expressions.ref("data", 101), "test")), + Expressions.and( + Expressions.equal(Expressions.day("date"), "2023-01-15"), + Expressions.isNull(Expressions.ref("f", 105)))); + + // Test serialization + String json = ExpressionParser.toJson(complexExpr, true); + assertThat(json).contains("\"transform\" : \"bucket[8]\""); + assertThat(json).contains("\"transform\" : \"day\""); + assertThat(json).contains("\"term\" : \"id\""); + assertThat(json).contains("\"term\" : \"date\""); + assertThat(json).contains("\"term\" : \"data\""); + assertThat(json).contains("\"term\" : \"f\""); + + // Test that parsing back maintains structure + Expression parsed = ExpressionParser.fromJson(json, SCHEMA); + assertThat(parsed).isInstanceOf(Or.class); + + // Test binding works correctly + Expression bound = Binder.bind(STRUCT_TYPE, parsed, true); + assertThat(bound).isInstanceOf(Or.class); + + // Verify equivalence with original + assertThat(ExpressionUtil.equivalent(complexExpr, parsed, STRUCT_TYPE, true)).isTrue(); + + // Test that mixed transform and resolved reference expressions bind to same fields + Expression originalBound = Binder.bind(STRUCT_TYPE, complexExpr, true); + + // Both bound expressions should be structurally equivalent + assertThat(originalBound.toString()).isEqualTo(bound.toString()); + } + + @Test + public void testTransformExpressionsWithResolvedReference() { + // Test expressions that reference transforms which in turn reference resolved references + // Create transforms using the new ResolvedReference-based factory methods + Expression bucketExpr = Expressions.equal( + Expressions.bucket(Expressions.ref("id", 100), 8), 3); + Expression dayExpr = Expressions.equal( + Expressions.day(Expressions.ref("date", 107)), "2023-01-15"); + Expression hourExpr = Expressions.equal( + Expressions.hour(Expressions.ref("ts", 108)), 10); + Expression truncateExpr = Expressions.equal( + Expressions.truncate(Expressions.ref("data", 101), 4), "test"); + + // Verify the expressions are created correctly + assertThat(bucketExpr).isInstanceOf(UnboundPredicate.class); + assertThat(dayExpr).isInstanceOf(UnboundPredicate.class); + assertThat(hourExpr).isInstanceOf(UnboundPredicate.class); + assertThat(truncateExpr).isInstanceOf(UnboundPredicate.class); + + // Verify the terms are ResolvedTransform instances + UnboundPredicate<?> bucketPred = (UnboundPredicate<?>) bucketExpr; + UnboundPredicate<?> dayPred = (UnboundPredicate<?>) dayExpr; + UnboundPredicate<?> hourPred = (UnboundPredicate<?>) hourExpr; + UnboundPredicate<?> truncatePred = (UnboundPredicate<?>) truncateExpr; + + assertThat(bucketPred.term()).isInstanceOf(ResolvedTransform.class); + assertThat(dayPred.term()).isInstanceOf(ResolvedTransform.class); + assertThat(hourPred.term()).isInstanceOf(ResolvedTransform.class); + assertThat(truncatePred.term()).isInstanceOf(ResolvedTransform.class); + + // Verify that ResolvedTransform preserves the ResolvedReference with field IDs + ResolvedTransform<?, ?> bucketTransform = (ResolvedTransform<?, ?>) bucketPred.term(); + ResolvedTransform<?, ?> dayTransform = (ResolvedTransform<?, ?>) dayPred.term(); + ResolvedTransform<?, ?> hourTransform = (ResolvedTransform<?, ?>) hourPred.term(); + ResolvedTransform<?, ?> truncateTransform = (ResolvedTransform<?, ?>) truncatePred.term(); + + assertThat(bucketTransform.resolvedRef().fieldId()).isEqualTo(100); + assertThat(bucketTransform.resolvedRef().name()).isEqualTo("id"); + assertThat(dayTransform.resolvedRef().fieldId()).isEqualTo(107); + assertThat(dayTransform.resolvedRef().name()).isEqualTo("date"); + assertThat(hourTransform.resolvedRef().fieldId()).isEqualTo(108); + assertThat(hourTransform.resolvedRef().name()).isEqualTo("ts"); + assertThat(truncateTransform.resolvedRef().fieldId()).isEqualTo(101); + assertThat(truncateTransform.resolvedRef().name()).isEqualTo("data"); + + // Test serialization + String bucketJson = ExpressionParser.toJson(bucketExpr, true); + String dayJson = ExpressionParser.toJson(dayExpr, true); + String hourJson = ExpressionParser.toJson(hourExpr, true); + String truncateJson = ExpressionParser.toJson(truncateExpr, true); + + // Verify JSON contains the expected transform and term information + assertThat(bucketJson).contains("\"transform\" : \"bucket[8]\""); + assertThat(bucketJson).contains("\"term\" : \"id\""); + assertThat(dayJson).contains("\"transform\" : \"day\""); + assertThat(dayJson).contains("\"term\" : \"date\""); + assertThat(hourJson).contains("\"transform\" : \"hour\""); + assertThat(hourJson).contains("\"term\" : \"ts\""); + assertThat(truncateJson).contains("\"transform\" : \"truncate[4]\""); + assertThat(truncateJson).contains("\"term\" : \"data\""); + + // Test parsing back + Expression parsedBucket = ExpressionParser.fromJson(bucketJson, SCHEMA); + Expression parsedDay = ExpressionParser.fromJson(dayJson, SCHEMA); + Expression parsedHour = ExpressionParser.fromJson(hourJson, SCHEMA); + Expression parsedTruncate = ExpressionParser.fromJson(truncateJson, SCHEMA); + + // Verify equivalence after round-trip + assertThat(ExpressionUtil.equivalent(bucketExpr, parsedBucket, STRUCT_TYPE, true)).isTrue(); + assertThat(ExpressionUtil.equivalent(dayExpr, parsedDay, STRUCT_TYPE, true)).isTrue(); + assertThat(ExpressionUtil.equivalent(hourExpr, parsedHour, STRUCT_TYPE, true)).isTrue(); + assertThat(ExpressionUtil.equivalent(truncateExpr, parsedTruncate, STRUCT_TYPE, true)).isTrue(); + + // Test binding works correctly + Expression boundBucket = Binder.bind(STRUCT_TYPE, parsedBucket, true); + Expression boundDay = Binder.bind(STRUCT_TYPE, parsedDay, true); + Expression boundHour = Binder.bind(STRUCT_TYPE, parsedHour, true); + Expression boundTruncate = Binder.bind(STRUCT_TYPE, parsedTruncate, true); + + assertThat(boundBucket).isInstanceOf(BoundPredicate.class); + assertThat(boundDay).isInstanceOf(BoundPredicate.class); + assertThat(boundHour).isInstanceOf(BoundPredicate.class); + assertThat(boundTruncate).isInstanceOf(BoundPredicate.class); + + // Verify bound expressions reference correct field IDs + BoundPredicate<?> boundBucketPred = (BoundPredicate<?>) boundBucket; + BoundPredicate<?> boundDayPred = (BoundPredicate<?>) boundDay; + BoundPredicate<?> boundHourPred = (BoundPredicate<?>) boundHour; + BoundPredicate<?> boundTruncatePred = (BoundPredicate<?>) boundTruncate; + + assertThat(boundBucketPred.term()).isInstanceOf(BoundTransform.class); + assertThat(boundDayPred.term()).isInstanceOf(BoundTransform.class); + assertThat(boundHourPred.term()).isInstanceOf(BoundTransform.class); + assertThat(boundTruncatePred.term()).isInstanceOf(BoundTransform.class); + } + + @Test + public void testComplexExpressionsWithResolvedReferenceTransforms() { + // Test complex expressions combining transforms created from ResolvedReference + Expression complexExpr = Expressions.and( + Expressions.or( + Expressions.equal(Expressions.bucket(Expressions.ref("id", 100), 8), 3), + Expressions.equal(Expressions.day(Expressions.ref("date", 107)), "2023-01-15")), + Expressions.and( + Expressions.equal(Expressions.truncate(Expressions.ref("data", 101), 4), "test"), + Expressions.isNull(Expressions.ref("f", 105)))); + + // Test serialization of complex expression + String json = ExpressionParser.toJson(complexExpr, true); + + // Verify all transforms and terms are present in JSON + assertThat(json).contains("\"transform\" : \"bucket[8]\""); + assertThat(json).contains("\"transform\" : \"day\""); + assertThat(json).contains("\"transform\" : \"truncate[4]\""); + assertThat(json).contains("\"term\" : \"id\""); + assertThat(json).contains("\"term\" : \"date\""); + assertThat(json).contains("\"term\" : \"data\""); + assertThat(json).contains("\"term\" : \"f\""); + + // Test parsing back maintains structure + Expression parsed = ExpressionParser.fromJson(json, SCHEMA); + assertThat(parsed).isInstanceOf(And.class); + + // Test binding works correctly for the complex expression + Expression bound = Binder.bind(STRUCT_TYPE, parsed, true); + assertThat(bound).isInstanceOf(And.class); + + // Verify equivalence after round-trip + assertThat(ExpressionUtil.equivalent(complexExpr, parsed, STRUCT_TYPE, true)).isTrue(); + + // Test that the bound expression maintains correct structure + Expression originalBound = Binder.bind(STRUCT_TYPE, complexExpr, true); + assertThat(originalBound.toString()).isEqualTo(bound.toString()); + } + + @Test + public void testResolvedReferenceTransformCompatibilityWithNamedReference() { + // Test that transforms created with ResolvedReference are equivalent to those created with NamedReference + + // Create equivalent transforms using both approaches + Expression bucketWithResolved = Expressions.equal( + Expressions.bucket(Expressions.ref("id", 100), 8), 3); + Expression bucketWithNamed = Expressions.equal( + Expressions.bucket("id", 8), 3); + + Expression dayWithResolved = Expressions.equal( + Expressions.day(Expressions.ref("date", 107)), "2023-01-15"); + Expression dayWithNamed = Expressions.equal( + Expressions.day("date"), "2023-01-15"); + + // Test that both serialize to the same JSON + String resolvedBucketJson = ExpressionParser.toJson(bucketWithResolved, true); + String namedBucketJson = ExpressionParser.toJson(bucketWithNamed, true); + String resolvedDayJson = ExpressionParser.toJson(dayWithResolved, true); + String namedDayJson = ExpressionParser.toJson(dayWithNamed, true); + + assertThat(resolvedBucketJson).isEqualTo(namedBucketJson); + assertThat(resolvedDayJson).isEqualTo(namedDayJson); + + // Test that parsing back produces equivalent expressions + Expression parsedResolvedBucket = ExpressionParser.fromJson(resolvedBucketJson, SCHEMA); + Expression parsedNamedBucket = ExpressionParser.fromJson(namedBucketJson, SCHEMA); + Expression parsedResolvedDay = ExpressionParser.fromJson(resolvedDayJson, SCHEMA); + Expression parsedNamedDay = ExpressionParser.fromJson(namedDayJson, SCHEMA); + + // All should be equivalent + assertThat(ExpressionUtil.equivalent(bucketWithResolved, bucketWithNamed, STRUCT_TYPE, true)).isTrue(); + assertThat(ExpressionUtil.equivalent(dayWithResolved, dayWithNamed, STRUCT_TYPE, true)).isTrue(); + assertThat(ExpressionUtil.equivalent(parsedResolvedBucket, parsedNamedBucket, STRUCT_TYPE, true)).isTrue(); + assertThat(ExpressionUtil.equivalent(parsedResolvedDay, parsedNamedDay, STRUCT_TYPE, true)).isTrue(); + + // Test that binding produces identical results + Expression boundResolvedBucket = Binder.bind(STRUCT_TYPE, parsedResolvedBucket, true); + Expression boundNamedBucket = Binder.bind(STRUCT_TYPE, parsedNamedBucket, true); + Expression boundResolvedDay = Binder.bind(STRUCT_TYPE, parsedResolvedDay, true); + Expression boundNamedDay = Binder.bind(STRUCT_TYPE, parsedNamedDay, true); + + assertThat(boundResolvedBucket.toString()).isEqualTo(boundNamedBucket.toString()); + assertThat(boundResolvedDay.toString()).isEqualTo(boundNamedDay.toString()); + } + + @Test + public void testResolvedTransformPreservesFieldIdInformation() { + // Test that ResolvedTransform preserves field ID information through binding + // This is the key advantage over UnboundTransform with NamedReference + + // Create a transform expression using ResolvedReference + ResolvedReference<Long> idRef = Expressions.ref("id", 100); + Expression bucketExpr = Expressions.equal(Expressions.bucket(idRef, 8), 3); + + // Verify it's a ResolvedTransform + UnboundPredicate<?> predicate = (UnboundPredicate<?>) bucketExpr; + assertThat(predicate.term()).isInstanceOf(ResolvedTransform.class); + + ResolvedTransform<?, ?> resolvedTransform = (ResolvedTransform<?, ?>) predicate.term(); + + // Verify the ResolvedReference is preserved with field ID + assertThat(resolvedTransform.resolvedRef().fieldId()).isEqualTo(100); + assertThat(resolvedTransform.resolvedRef().name()).isEqualTo("id"); + + // Test binding works correctly with field ID information + Expression bound = Binder.bind(STRUCT_TYPE, bucketExpr, true); + assertThat(bound).isInstanceOf(BoundPredicate.class); + + BoundPredicate<?> boundPredicate = (BoundPredicate<?>) bound; + assertThat(boundPredicate.term()).isInstanceOf(BoundTransform.class); + + BoundTransform<?, ?> boundTransform = (BoundTransform<?, ?>) boundPredicate.term(); + assertThat(boundTransform.ref().fieldId()).isEqualTo(100); + assertThat(boundTransform.ref().name()).isEqualTo("id"); + + // Compare with equivalent NamedReference approach to show they bind to same field + Expression namedBucketExpr = Expressions.equal(Expressions.bucket("id", 8), 3); + Expression boundNamed = Binder.bind(STRUCT_TYPE, namedBucketExpr, true); + + // Both should resolve to the same field ID since they reference the same field + BoundPredicate<?> boundNamedPredicate = (BoundPredicate<?>) boundNamed; + BoundTransform<?, ?> boundNamedTransform = (BoundTransform<?, ?>) boundNamedPredicate.term(); + + assertThat(boundTransform.ref().fieldId()).isEqualTo(boundNamedTransform.ref().fieldId()); + assertThat(boundTransform.toString()).isEqualTo(boundNamedTransform.toString()); + } + + @Test + public void testResolvedTransformExpressionParserIntegration() { + // Test that expressions with ResolvedTransform integrate correctly with ExpressionParser + + // Create expressions using ResolvedTransform + Expression bucketExpr = Expressions.equal( + Expressions.bucket(Expressions.ref("id", 100), 8), 3); + Expression dayExpr = Expressions.equal( + Expressions.day(Expressions.ref("date", 107)), "2023-01-15"); + + // Test that they can be serialized (even though they become NamedReference in JSON) + String bucketJson = ExpressionParser.toJson(bucketExpr, true); + String dayJson = ExpressionParser.toJson(dayExpr, true); + + assertThat(bucketJson).contains("\"transform\" : \"bucket[8]\""); + assertThat(bucketJson).contains("\"term\" : \"id\""); + assertThat(dayJson).contains("\"transform\" : \"day\""); + assertThat(dayJson).contains("\"term\" : \"date\""); + + // Test parsing back (will create UnboundTransform with NamedReference) + Expression parsedBucket = ExpressionParser.fromJson(bucketJson, SCHEMA); + Expression parsedDay = ExpressionParser.fromJson(dayJson, SCHEMA); + + // The parsed expressions will have UnboundTransform (not ResolvedTransform) + // but they should still be functionally equivalent when bound + UnboundPredicate<?> parsedBucketPred = (UnboundPredicate<?>) parsedBucket; + UnboundPredicate<?> parsedDayPred = (UnboundPredicate<?>) parsedDay; + + assertThat(parsedBucketPred.term()).isInstanceOf(UnboundTransform.class); + assertThat(parsedDayPred.term()).isInstanceOf(UnboundTransform.class); + + // Both original and parsed should bind to the same fields + Expression originalBucketBound = Binder.bind(STRUCT_TYPE, bucketExpr, true); + Expression parsedBucketBound = Binder.bind(STRUCT_TYPE, parsedBucket, true); + Expression originalDayBound = Binder.bind(STRUCT_TYPE, dayExpr, true); + Expression parsedDayBound = Binder.bind(STRUCT_TYPE, parsedDay, true); + + assertThat(originalBucketBound.toString()).isEqualTo(parsedBucketBound.toString()); + assertThat(originalDayBound.toString()).isEqualTo(parsedDayBound.toString()); + + // Test equivalence + assertThat(ExpressionUtil.equivalent(bucketExpr, parsedBucket, STRUCT_TYPE, true)).isTrue(); + assertThat(ExpressionUtil.equivalent(dayExpr, parsedDay, STRUCT_TYPE, true)).isTrue(); + } + + @Test + public void testMixedResolvedTransformAndResolvedReferenceExpressions() { + // Test complex expressions mixing ResolvedTransform and direct ResolvedReference + Expression complexExpr = Expressions.and( + Expressions.equal(Expressions.bucket(Expressions.ref("id", 100), 8), 3), + Expressions.or( + Expressions.equal(Expressions.ref("data", 101), "test"), + Expressions.equal(Expressions.day(Expressions.ref("date", 107)), "2023-01-15"))); + + // Verify structure contains both ResolvedTransform and ResolvedReference + assertThat(complexExpr).isInstanceOf(And.class); + And andExpr = (And) complexExpr; + + // First part should be ResolvedTransform + UnboundPredicate<?> bucketPred = (UnboundPredicate<?>) andExpr.left(); + assertThat(bucketPred.term()).isInstanceOf(ResolvedTransform.class); + + // Second part is OR with ResolvedReference and ResolvedTransform + Or orExpr = (Or) andExpr.right(); + UnboundPredicate<?> dataPred = (UnboundPredicate<?>) orExpr.left(); + UnboundPredicate<?> dayPred = (UnboundPredicate<?>) orExpr.right(); + + assertThat(dataPred.term()).isInstanceOf(ResolvedReference.class); + assertThat(dayPred.term()).isInstanceOf(ResolvedTransform.class); + + // Test serialization and parsing + String json = ExpressionParser.toJson(complexExpr, true); + Expression parsed = ExpressionParser.fromJson(json, SCHEMA); + + // Test binding works correctly + Expression bound = Binder.bind(STRUCT_TYPE, parsed, true); + Expression originalBound = Binder.bind(STRUCT_TYPE, complexExpr, true); + + assertThat(bound.toString()).isEqualTo(originalBound.toString()); + assertThat(ExpressionUtil.equivalent(complexExpr, parsed, STRUCT_TYPE, true)).isTrue(); + } + + @Test + public void testBoundExpressionSerializationWithResolvedReference() { + // Test that bound expressions serialize to JSON with ResolvedReference terms when includeFieldIds=true + + // Create and bind simple expressions + Expression simpleExpr = Expressions.equal(Expressions.ref("id", 100), 42L); + Expression boundSimple = Binder.bind(STRUCT_TYPE, simpleExpr, true); + + // Test serialization without field IDs (existing behavior) + String jsonWithoutFieldIds = ExpressionParser.toJson(boundSimple, true); + assertThat(jsonWithoutFieldIds).contains("\"term\" : \"id\""); + assertThat(jsonWithoutFieldIds).doesNotContain("fieldId"); + + // Test serialization with field IDs (new behavior) + String jsonWithFieldIds = ExpressionParser.toJson(boundSimple, true, true); + assertThat(jsonWithFieldIds).contains("\"type\" : \"ref\""); + assertThat(jsonWithFieldIds).contains("\"name\" : \"id\""); + assertThat(jsonWithFieldIds).contains("\"fieldId\" : 100"); + + // Test complex expressions + Expression complexExpr = Expressions.and( + Expressions.equal(Expressions.ref("data", 101), "test"), + Expressions.greaterThan(Expressions.ref("id", 100), 50L)); + Expression boundComplex = Binder.bind(STRUCT_TYPE, complexExpr, true); + + String complexJsonWithFieldIds = ExpressionParser.toJson(boundComplex, true, true); + assertThat(complexJsonWithFieldIds).contains("\"fieldId\" : 100"); + assertThat(complexJsonWithFieldIds).contains("\"fieldId\" : 101"); + assertThat(complexJsonWithFieldIds).contains("\"name\" : \"id\""); + assertThat(complexJsonWithFieldIds).contains("\"name\" : \"data\""); + } + + @Test + public void testBoundTransformExpressionSerializationWithResolvedReference() { + // Test that bound transform expressions serialize to JSON with ResolvedReference terms when includeFieldIds=true + + // Create transform expressions using ResolvedTransform + Expression bucketExpr = Expressions.equal( + Expressions.bucket(Expressions.ref("id", 100), 8), 3); + Expression dayExpr = Expressions.equal( + Expressions.day(Expressions.ref("date", 107)), "2023-01-15"); + + // Bind the expressions + Expression boundBucket = Binder.bind(STRUCT_TYPE, bucketExpr, true); + Expression boundDay = Binder.bind(STRUCT_TYPE, dayExpr, true); + + // Test serialization without field IDs (existing behavior) + String bucketJsonNoFieldIds = ExpressionParser.toJson(boundBucket, true); + assertThat(bucketJsonNoFieldIds).contains("\"transform\" : \"bucket[8]\""); + assertThat(bucketJsonNoFieldIds).contains("\"term\" : \"id\""); + assertThat(bucketJsonNoFieldIds).doesNotContain("fieldId"); + + // Test serialization with field IDs (new behavior) + String bucketJsonWithFieldIds = ExpressionParser.toJson(boundBucket, true, true); + assertThat(bucketJsonWithFieldIds).contains("\"transform\" : \"bucket[8]\""); + assertThat(bucketJsonWithFieldIds).contains("\"type\" : \"ref\""); + assertThat(bucketJsonWithFieldIds).contains("\"name\" : \"id\""); + assertThat(bucketJsonWithFieldIds).contains("\"fieldId\" : 100"); + + String dayJsonWithFieldIds = ExpressionParser.toJson(boundDay, true, true); + assertThat(dayJsonWithFieldIds).contains("\"transform\" : \"day\""); + assertThat(dayJsonWithFieldIds).contains("\"type\" : \"ref\""); + assertThat(dayJsonWithFieldIds).contains("\"name\" : \"date\""); + assertThat(dayJsonWithFieldIds).contains("\"fieldId\" : 107"); + } + + @Test + public void testComplexBoundTransformExpressionSerializationWithResolvedReference() { + // Test complex bound expressions with mixed transforms and references + Expression complexExpr = Expressions.and( + Expressions.or( + Expressions.equal(Expressions.bucket(Expressions.ref("id", 100), 8), 3), + Expressions.equal(Expressions.day(Expressions.ref("date", 107)), "2023-01-15")), + Expressions.and( + Expressions.equal(Expressions.truncate(Expressions.ref("data", 101), 4), "test"), + Expressions.isNull(Expressions.ref("f", 105)))); + + // Bind the complex expression + Expression bound = Binder.bind(STRUCT_TYPE, complexExpr, true); + + // Test serialization with field IDs + String jsonWithFieldIds = ExpressionParser.toJson(bound, true, true); + + // Verify all field IDs are present + assertThat(jsonWithFieldIds).contains("\"fieldId\" : 100"); // id field + assertThat(jsonWithFieldIds).contains("\"fieldId\" : 107"); // date field + assertThat(jsonWithFieldIds).contains("\"fieldId\" : 101"); // data field + assertThat(jsonWithFieldIds).contains("\"fieldId\" : 105"); // f field + + // Verify all field names are present + assertThat(jsonWithFieldIds).contains("\"name\" : \"id\""); + assertThat(jsonWithFieldIds).contains("\"name\" : \"date\""); + assertThat(jsonWithFieldIds).contains("\"name\" : \"data\""); + assertThat(jsonWithFieldIds).contains("\"name\" : \"f\""); + + // Verify transforms are serialized correctly + assertThat(jsonWithFieldIds).contains("\"transform\" : \"bucket[8]\""); + assertThat(jsonWithFieldIds).contains("\"transform\" : \"day\""); + assertThat(jsonWithFieldIds).contains("\"transform\" : \"truncate[4]\""); + + // Verify ResolvedReference structure for both transforms and direct references + // Count occurrences of "type" : "ref" to ensure all references use ResolvedReference format + long refTypeCount = jsonWithFieldIds.lines() + .mapToLong(line -> { + int count = 0; + int index = 0; + String pattern = "\"type\" : \"ref\""; + while ((index = line.indexOf(pattern, index)) != -1) { + count++; + index += pattern.length(); + } + return count; + }) + .sum(); + + assertThat(refTypeCount).isEqualTo(4); // Should have 4 ResolvedReference objects + } + + @Test + public void testBoundExpressionRoundTripWithResolvedReference() { + // Test that bound expressions with ResolvedReference can be serialized and maintain information + + // Create expressions using both ResolvedTransform and ResolvedReference + Expression originalExpr = Expressions.and( + Expressions.equal(Expressions.bucket(Expressions.ref("id", 100), 8), 3), + Expressions.equal(Expressions.ref("data", 101), "test")); + + // Bind the expression + Expression bound = Binder.bind(STRUCT_TYPE, originalExpr, true); + + // Serialize with field IDs + String jsonWithFieldIds = ExpressionParser.toJson(bound, true, true); + + // Verify the JSON structure contains complete ResolvedReference information + assertThat(jsonWithFieldIds).contains("\"type\" : \"ref\""); + assertThat(jsonWithFieldIds).contains("\"fieldId\" : 100"); + assertThat(jsonWithFieldIds).contains("\"fieldId\" : 101"); + + // Note: The current parser doesn't support parsing ResolvedReference format back to expressions + // This test documents the serialization capability for external systems that need field ID information + + // Verify that the bound expression maintains the same field IDs after serialization + BoundPredicate<?> boundPred = (BoundPredicate<?>) ((And) bound).left(); + BoundTransform<?, ?> boundTransform = (BoundTransform<?, ?>) boundPred.term(); + assertThat(boundTransform.ref().fieldId()).isEqualTo(100); + + BoundPredicate<?> boundDataPred = (BoundPredicate<?>) ((And) bound).right(); + BoundReference<?> boundDataRef = (BoundReference<?>) boundDataPred.term(); + assertThat(boundDataRef.fieldId()).isEqualTo(101); + } + + @Test + public void testBoundExpressionFieldIdPreservationAcrossAllTypes() { + // Test that all data types preserve field IDs correctly in bound expression serialization + + // Create expressions for fields that support meaningful comparisons + Expression[] expressions = new Expression[] { + Expressions.equal(Expressions.ref("id", 100), 42L), + Expressions.equal(Expressions.ref("data", 101), "test"), + Expressions.equal(Expressions.ref("b", 102), true), + Expressions.equal(Expressions.ref("i", 103), 42), + Expressions.equal(Expressions.ref("l", 104), 42L), + Expressions.equal(Expressions.ref("f", 105), 3.14f), + Expressions.equal(Expressions.ref("d", 106), 3.14159), + Expressions.equal(Expressions.ref("s", 110), "test-string"), + Expressions.greaterThan(Expressions.ref("id", 100), 10L) + }; + + for (Expression expr : expressions) { + // Bind each expression + Expression bound = Binder.bind(STRUCT_TYPE, expr, true); + + // Serialize with field IDs + String json = ExpressionParser.toJson(bound, true, true); + + // Extract expected field ID from the original ResolvedReference + UnboundPredicate<?> unboundPred = (UnboundPredicate<?>) expr; + ResolvedReference<?> resolvedRef = (ResolvedReference<?>) unboundPred.term(); + int expectedFieldId = resolvedRef.fieldId(); + + // Verify the field ID is preserved in the JSON + assertThat(json).contains("\"fieldId\" : " + expectedFieldId); + assertThat(json).contains("\"type\" : \"ref\""); + } + } +} \ No newline at end of file
