This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new b89842374a [core] Add support for IN predicate pushdown in ORC format 
(#6047)
b89842374a is described below

commit b89842374a7056870bd0f991cf696c7c6b4f4d1a
Author: xiaochen <[email protected]>
AuthorDate: Mon Aug 11 17:41:36 2025 +0800

    [core] Add support for IN predicate pushdown in ORC format (#6047)
---
 .../paimon/format/orc/filter/OrcFilters.java       | 29 +++++++++
 .../orc/filter/OrcPredicateFunctionVisitor.java    | 20 +++++-
 .../format/orc/filter/OrcFilterConverterTest.java  | 73 +++++++++++++++++++++-
 3 files changed, 118 insertions(+), 4 deletions(-)

diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcFilters.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcFilters.java
index 4227137bab..81036452e1 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcFilters.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcFilters.java
@@ -304,4 +304,33 @@ public class OrcFilters {
             return "AND(" + Arrays.toString(preds) + ")";
         }
     }
+
+    /** An IN predicate that can be evaluated by the OrcInputFormat. */
+    public static class In extends ColumnPredicate {
+        private final Object[] literals;
+
+        /** Creates an IN predicate. */
+        public In(String columnName, PredicateLeaf.Type literalType, Object... 
literals) {
+            super(columnName, literalType);
+            this.literals = literals;
+        }
+
+        @Override
+        public SearchArgument.Builder add(SearchArgument.Builder builder) {
+            Object[] castedLiterals = new Object[literals.length];
+            for (int i = 0; i < literals.length; i++) {
+                if (literals[i] instanceof Serializable) {
+                    castedLiterals[i] = castLiteral((Serializable) 
literals[i]);
+                } else {
+                    castedLiterals[i] = literals[i];
+                }
+            }
+            return builder.in(columnName, literalType, castedLiterals);
+        }
+
+        @Override
+        public String toString() {
+            return columnName + " IN " + Arrays.toString(literals);
+        }
+    }
 }
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcPredicateFunctionVisitor.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcPredicateFunctionVisitor.java
index 935b482109..23612edd71 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcPredicateFunctionVisitor.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcPredicateFunctionVisitor.java
@@ -122,12 +122,28 @@ public class OrcPredicateFunctionVisitor
 
     @Override
     public Optional<OrcFilters.Predicate> visitIn(FieldRef fieldRef, 
List<Object> literals) {
-        return Optional.empty();
+        PredicateLeaf.Type colType = toOrcType(fieldRef.type());
+        if (colType == null) {
+            return Optional.empty();
+        }
+
+        Object[] orcLiterals = new Object[literals.size()];
+        for (int i = 0; i < literals.size(); i++) {
+            Object orcLiteral = toOrcObject(colType, literals.get(i));
+            if (orcLiteral == null && literals.get(i) != null) {
+                // If conversion fails for non-null value, skip this predicate
+                return Optional.empty();
+            }
+            orcLiterals[i] = orcLiteral;
+        }
+
+        return Optional.of(new OrcFilters.In(fieldRef.name(), colType, 
orcLiterals));
     }
 
     @Override
     public Optional<OrcFilters.Predicate> visitNotIn(FieldRef fieldRef, 
List<Object> literals) {
-        return Optional.empty();
+        Optional<OrcFilters.Predicate> inPredicate = visitIn(fieldRef, 
literals);
+        return inPredicate.map(OrcFilters.Not::new);
     }
 
     @Override
diff --git 
a/paimon-format/src/test/java/org/apache/paimon/format/orc/filter/OrcFilterConverterTest.java
 
b/paimon-format/src/test/java/org/apache/paimon/format/orc/filter/OrcFilterConverterTest.java
index c9a972f150..78bd01be96 100644
--- 
a/paimon-format/src/test/java/org/apache/paimon/format/orc/filter/OrcFilterConverterTest.java
+++ 
b/paimon-format/src/test/java/org/apache/paimon/format/orc/filter/OrcFilterConverterTest.java
@@ -53,6 +53,7 @@ import java.math.BigDecimal;
 import java.time.LocalDateTime;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
 import java.util.stream.LongStream;
@@ -109,7 +110,7 @@ public class OrcFilterConverterTest {
                                                 .collect(Collectors.toList()))
                                 .visit(OrcPredicateFunctionVisitor.VISITOR)
                                 .isPresent())
-                .isFalse();
+                .isTrue();
 
         assertThat(
                         builder.notIn(
@@ -119,7 +120,7 @@ public class OrcFilterConverterTest {
                                                 .collect(Collectors.toList()))
                                 .visit(OrcPredicateFunctionVisitor.VISITOR)
                                 .isPresent())
-                .isFalse();
+                .isTrue();
     }
 
     @ParameterizedTest
@@ -165,6 +166,74 @@ public class OrcFilterConverterTest {
                 builder.greaterOrEqual(0, tuple4.value),
                 new OrcFilters.Not(new OrcFilters.LessThan("fieldName", 
tuple4.type, tuple4.value)),
                 tuple4.canPushDown);
+
+        test(
+                builder.in(0, Collections.singletonList(tuple4.value)),
+                new OrcFilters.Equals("fieldName", tuple4.type, tuple4.value),
+                tuple4.canPushDown);
+
+        test(
+                builder.notIn(0, Collections.singletonList(tuple4.value)),
+                new OrcFilters.Not(new OrcFilters.Equals("fieldName", 
tuple4.type, tuple4.value)),
+                tuple4.canPushDown);
+    }
+
+    @Test
+    public void testInPredicateWithMultipleValues() {
+        PredicateBuilder builder =
+                new PredicateBuilder(
+                        new RowType(
+                                Collections.singletonList(
+                                        new DataField(0, "testField", new 
BigIntType()))));
+
+        // Test IN with multiple values (≤20 values should be converted to OR 
of EQUALS)
+        test(
+                builder.in(0, Arrays.asList(1L, 2L, 3L)),
+                new OrcFilters.Or(
+                        new OrcFilters.Or(
+                                new OrcFilters.Equals("testField", 
PredicateLeaf.Type.LONG, 1L),
+                                new OrcFilters.Equals("testField", 
PredicateLeaf.Type.LONG, 2L)),
+                        new OrcFilters.Equals("testField", 
PredicateLeaf.Type.LONG, 3L)),
+                true);
+
+        // Test NOT IN with multiple values (should be converted to AND of NOT 
EQUALS)
+        test(
+                builder.notIn(0, Arrays.asList(1L, 2L, 3L)),
+                new OrcFilters.And(
+                        new OrcFilters.And(
+                                new OrcFilters.Not(
+                                        new OrcFilters.Equals(
+                                                "testField", 
PredicateLeaf.Type.LONG, 1L)),
+                                new OrcFilters.Not(
+                                        new OrcFilters.Equals(
+                                                "testField", 
PredicateLeaf.Type.LONG, 2L))),
+                        new OrcFilters.Not(
+                                new OrcFilters.Equals("testField", 
PredicateLeaf.Type.LONG, 3L))),
+                true);
+    }
+
+    @Test
+    public void testInPredicateWithManyValues() {
+        PredicateBuilder builder =
+                new PredicateBuilder(
+                        new RowType(
+                                Collections.singletonList(
+                                        new DataField(0, "testField", new 
BigIntType()))));
+
+        // Test IN with >20 values (should use real IN operation)
+        List<Object> manyValues = LongStream.range(1L, 
22L).boxed().collect(Collectors.toList());
+        test(
+                builder.in(0, manyValues),
+                new OrcFilters.In("testField", PredicateLeaf.Type.LONG, 
manyValues.toArray()),
+                true);
+
+        // Test NOT IN with >20 values (should use real NOT IN operation)
+        test(
+                builder.notIn(0, manyValues),
+                new OrcFilters.Not(
+                        new OrcFilters.In(
+                                "testField", PredicateLeaf.Type.LONG, 
manyValues.toArray())),
+                true);
     }
 
     private void test(Predicate predicate, OrcFilters.Predicate orcPredicate, 
boolean canPushDown) {

Reply via email to