This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 6d47aa4466 [core] introduce Between LeafFunction (#7209)
6d47aa4466 is described below
commit 6d47aa44662af739bdbad7399f7e215f5ab44f41
Author: Faiz <[email protected]>
AuthorDate: Thu Feb 5 10:38:21 2026 +0800
[core] introduce Between LeafFunction (#7209)
This PR introduce a Between leaf function. Previously, between predicate
is implemented through a compounded predicate combining a GreaterOrEqual
and LessOrEqual, which is very inefficient for some index types. (btree,
range bitmap and more)
Also introduce NotBetween for internal use of negation of Between
function.
---
.../globalindex/OffsetGlobalIndexReader.java | 5 +
.../paimon/globalindex/UnionGlobalIndexReader.java | 5 +
.../globalindex/btree/BTreeFileMetaSelector.java | 16 +++
.../paimon/globalindex/btree/BTreeIndexReader.java | 13 ++
.../globalindex/btree/LazyFilteredBTreeReader.java | 14 ++
.../java/org/apache/paimon/predicate/Between.java | 70 ++++++++++
.../apache/paimon/predicate/FunctionVisitor.java | 11 ++
.../org/apache/paimon/predicate/LeafFunction.java | 8 ++
.../paimon/predicate/LeafTernaryFunction.java | 64 +++++++++
.../org/apache/paimon/predicate/NotBetween.java | 72 ++++++++++
.../apache/paimon/predicate/PredicateBuilder.java | 19 ++-
.../globalindex/btree/AbstractIndexReaderTest.java | 152 ++++++++++++++++++++-
.../btree/BTreeFileMetaSelectorTest.java | 13 ++
.../globalindex/btree/BTreeIndexReaderTest.java | 147 +-------------------
.../btree/LazyFilteredBTreeIndexReaderTest.java | 146 +-------------------
.../SearchArgumentToPredicateConverterTest.java | 6 +-
16 files changed, 459 insertions(+), 302 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/OffsetGlobalIndexReader.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/OffsetGlobalIndexReader.java
index 5c3afecc7c..f1c3d154ed 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/OffsetGlobalIndexReader.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/OffsetGlobalIndexReader.java
@@ -111,6 +111,11 @@ public class OffsetGlobalIndexReader implements
GlobalIndexReader {
return applyOffset(wrapped.visitNotIn(fieldRef, literals));
}
+ @Override
+ public Optional<GlobalIndexResult> visitBetween(FieldRef fieldRef, Object
from, Object to) {
+ return applyOffset(wrapped.visitBetween(fieldRef, from, to));
+ }
+
@Override
public Optional<GlobalIndexResult> visitVectorSearch(VectorSearch
vectorSearch) {
return applyOffset(
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/UnionGlobalIndexReader.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/UnionGlobalIndexReader.java
index 6e10e38f0a..a59ec6c6a4 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/UnionGlobalIndexReader.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/UnionGlobalIndexReader.java
@@ -108,6 +108,11 @@ public class UnionGlobalIndexReader implements
GlobalIndexReader {
return union(reader -> reader.visitNotIn(fieldRef, literals));
}
+ @Override
+ public Optional<GlobalIndexResult> visitBetween(FieldRef fieldRef, Object
from, Object to) {
+ return union(reader -> reader.visitBetween(fieldRef, from, to));
+ }
+
@Override
public Optional<GlobalIndexResult> visitVectorSearch(VectorSearch
vectorSearch) {
return union(reader -> reader.visitVectorSearch(vectorSearch));
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelector.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelector.java
index b0f83a2686..a85b501587 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelector.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelector.java
@@ -178,6 +178,22 @@ public class BTreeFileMetaSelector implements
FunctionVisitor<Optional<List<Glob
return Optional.of(filter(meta -> true));
}
+ @Override
+ public Optional<List<GlobalIndexIOMeta>> visitBetween(
+ FieldRef fieldRef, Object from, Object to) {
+ return Optional.of(
+ filter(
+ meta -> {
+ if (meta.onlyNulls()) {
+ return false;
+ }
+ Object minKey = deserialize(meta.getFirstKey());
+ Object maxKey = deserialize(meta.getLastKey());
+ return comparator.compare(from, maxKey) <= 0
+ && comparator.compare(to, minKey) >= 0;
+ }));
+ }
+
@Override
public Optional<List<GlobalIndexIOMeta>> visitAnd(
List<Optional<List<GlobalIndexIOMeta>>> children) {
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
index 69cf7ceaa4..068cf9993b 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
@@ -342,6 +342,19 @@ public class BTreeIndexReader implements GlobalIndexReader
{
}));
}
+ @Override
+ public Optional<GlobalIndexResult> visitBetween(FieldRef fieldRef, Object
from, Object to) {
+ return Optional.of(
+ GlobalIndexResult.create(
+ () -> {
+ try {
+ return rangeQuery(from, to, true, true);
+ } catch (IOException ioe) {
+ throw new RuntimeException("fail to read btree
index file.", ioe);
+ }
+ }));
+ }
+
private RoaringNavigableMap64 allNonNullRows() throws IOException {
// Traverse all data to avoid returning null values, which is very
advantageous in
// situations where there are many null values
diff --git
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeReader.java
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeReader.java
index 9ffa60d909..f5badf1a70 100644
---
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeReader.java
+++
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeReader.java
@@ -249,6 +249,20 @@ public class LazyFilteredBTreeReader implements
GlobalIndexReader {
return createUnionReader(selected).visitNotIn(fieldRef, literals);
}
+ @Override
+ public Optional<GlobalIndexResult> visitBetween(FieldRef fieldRef, Object
from, Object to) {
+ Optional<List<GlobalIndexIOMeta>> selectedOpt =
+ fileSelector.visitBetween(fieldRef, from, to);
+ if (!selectedOpt.isPresent()) {
+ return Optional.empty();
+ }
+ List<GlobalIndexIOMeta> selected = selectedOpt.get();
+ if (selected.isEmpty()) {
+ return Optional.of(GlobalIndexResult.createEmpty());
+ }
+ return createUnionReader(selected).visitBetween(fieldRef, from, to);
+ }
+
/**
* Create a Union Reader for given files. The union reader is composed by
readers from reader
* cache, so please do not close it.
diff --git
a/paimon-common/src/main/java/org/apache/paimon/predicate/Between.java
b/paimon-common/src/main/java/org/apache/paimon/predicate/Between.java
new file mode 100644
index 0000000000..1829efe09e
--- /dev/null
+++ b/paimon-common/src/main/java/org/apache/paimon/predicate/Between.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.predicate;
+
+import org.apache.paimon.types.DataType;
+
+import
org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.util.List;
+import java.util.Optional;
+
+import static org.apache.paimon.predicate.CompareUtils.compareLiteral;
+
+/** The {@link LeafFunction} to eval between. */
+public class Between extends LeafTernaryFunction {
+
+ private static final long serialVersionUID = 1L;
+
+ public static final String NAME = "BETWEEN";
+
+ public static final Between INSTANCE = new Between();
+
+ @JsonCreator
+ public Between() {}
+
+ @Override
+ public boolean test(DataType type, Object field, Object literal1, Object
literal2) {
+ return compareLiteral(type, literal1, field) <= 0
+ && compareLiteral(type, literal2, field) >= 0;
+ }
+
+ @Override
+ public boolean test(
+ DataType type,
+ long rowCount,
+ Object min,
+ Object max,
+ Long nullCount,
+ Object literal1,
+ Object literal2) {
+ // true if [min, max] and [l(0), l(1)] have intersection
+ return compareLiteral(type, literal1, max) <= 0 &&
compareLiteral(type, literal2, min) >= 0;
+ }
+
+ @Override
+ public Optional<LeafFunction> negate() {
+ return Optional.of(NotBetween.INSTANCE);
+ }
+
+ @Override
+ public <T> T visit(FunctionVisitor<T> visitor, FieldRef fieldRef,
List<Object> literals) {
+ return visitor.visitBetween(fieldRef, literals.get(0),
literals.get(1));
+ }
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java
b/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java
index 3edca0a459..5aa4ca1373 100644
---
a/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java
+++
b/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java
@@ -18,6 +18,7 @@
package org.apache.paimon.predicate;
+import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
@@ -81,6 +82,16 @@ public interface FunctionVisitor<T> extends
PredicateVisitor<T> {
T visitNotIn(FieldRef fieldRef, List<Object> literals);
+ default T visitBetween(FieldRef fieldRef, Object from, Object to) {
+ return visitAnd(
+ Arrays.asList(visitGreaterOrEqual(fieldRef, from),
visitLessOrEqual(fieldRef, to)));
+ }
+
+ default T visitNotBetween(FieldRef fieldRef, Object from, Object to) {
+ return visitOr(
+ Arrays.asList(visitLessThan(fieldRef, from),
visitGreaterThan(fieldRef, to)));
+ }
+
// ----------------- Compound functions ------------------------
T visitAnd(List<T> children);
diff --git
a/paimon-common/src/main/java/org/apache/paimon/predicate/LeafFunction.java
b/paimon-common/src/main/java/org/apache/paimon/predicate/LeafFunction.java
index d3d0ba9eb9..3b639b1439 100644
--- a/paimon-common/src/main/java/org/apache/paimon/predicate/LeafFunction.java
+++ b/paimon-common/src/main/java/org/apache/paimon/predicate/LeafFunction.java
@@ -62,6 +62,10 @@ public abstract class LeafFunction implements Serializable {
return In.INSTANCE;
case NotIn.NAME:
return NotIn.INSTANCE;
+ case Between.NAME:
+ return Between.INSTANCE;
+ case NotBetween.NAME:
+ return NotBetween.INSTANCE;
default:
throw new IllegalArgumentException(
"Could not resolve leaf function '" + name + "'");
@@ -98,6 +102,10 @@ public abstract class LeafFunction implements Serializable {
return In.NAME;
} else if (this instanceof NotIn) {
return NotIn.NAME;
+ } else if (this instanceof Between) {
+ return Between.NAME;
+ } else if (this instanceof NotBetween) {
+ return NotBetween.NAME;
} else {
throw new IllegalArgumentException(
"Unknown leaf function class for JSON serialization: " +
getClass());
diff --git
a/paimon-common/src/main/java/org/apache/paimon/predicate/LeafTernaryFunction.java
b/paimon-common/src/main/java/org/apache/paimon/predicate/LeafTernaryFunction.java
new file mode 100644
index 0000000000..9c027b3833
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/predicate/LeafTernaryFunction.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.predicate;
+
+import org.apache.paimon.types.DataType;
+
+import java.util.List;
+
+/** Abstract {@link LeafFunction} for ternary function. */
+public abstract class LeafTernaryFunction extends LeafFunction {
+
+ public abstract boolean test(DataType type, Object field, Object literal1,
Object literal2);
+
+ public abstract boolean test(
+ DataType type,
+ long rowCount,
+ Object min,
+ Object max,
+ Long nullCount,
+ Object literal1,
+ Object literal2);
+
+ @Override
+ public boolean test(DataType type, Object field, List<Object> literals) {
+ if (field == null || literals.get(0) == null || literals.get(1) ==
null) {
+ return false;
+ }
+
+ return test(type, field, literals.get(0), literals.get(1));
+ }
+
+ @Override
+ public boolean test(
+ DataType type,
+ long rowCount,
+ Object min,
+ Object max,
+ Long nullCount,
+ List<Object> literals) {
+ if (nullCount != null) {
+ if (rowCount == nullCount || literals.get(0) == null ||
literals.get(1) == null) {
+ return false;
+ }
+ }
+
+ return test(type, rowCount, min, max, nullCount, literals.get(0),
literals.get(1));
+ }
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/predicate/NotBetween.java
b/paimon-common/src/main/java/org/apache/paimon/predicate/NotBetween.java
new file mode 100644
index 0000000000..d92dbe0ec5
--- /dev/null
+++ b/paimon-common/src/main/java/org/apache/paimon/predicate/NotBetween.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.predicate;
+
+import org.apache.paimon.types.DataType;
+
+import
org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.util.List;
+import java.util.Optional;
+
+import static org.apache.paimon.predicate.CompareUtils.compareLiteral;
+
+/**
+ * The {@link LeafFunction} for not between. Now this is just an internal
function as the negation
+ * of {@link Between}.
+ */
+public class NotBetween extends LeafTernaryFunction {
+
+ private static final long serialVersionUID = 1L;
+
+ public static final String NAME = "NOT_BETWEEN";
+
+ public static final NotBetween INSTANCE = new NotBetween();
+
+ @JsonCreator
+ public NotBetween() {}
+
+ @Override
+ public boolean test(DataType type, Object field, Object literal1, Object
literal2) {
+ return compareLiteral(type, literal1, field) > 0
+ || compareLiteral(type, literal2, field) < 0;
+ }
+
+ @Override
+ public boolean test(
+ DataType type,
+ long rowCount,
+ Object min,
+ Object max,
+ Long nullCount,
+ Object literal1,
+ Object literal2) {
+ return compareLiteral(type, literal1, min) > 0 || compareLiteral(type,
literal2, max) < 0;
+ }
+
+ @Override
+ public Optional<LeafFunction> negate() {
+ return Optional.of(Between.INSTANCE);
+ }
+
+ @Override
+ public <T> T visit(FunctionVisitor<T> visitor, FieldRef fieldRef,
List<Object> literals) {
+ return visitor.visitNotBetween(fieldRef, literals.get(0),
literals.get(1));
+ }
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java
b/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java
index bee824769d..f410551e29 100644
---
a/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java
+++
b/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java
@@ -228,20 +228,19 @@ public class PredicateBuilder {
}
public Predicate between(int idx, Object includedLowerBound, Object
includedUpperBound) {
- return new CompoundPredicate(
- And.INSTANCE,
- Arrays.asList(
- greaterOrEqual(idx, includedLowerBound),
- lessOrEqual(idx, includedUpperBound)));
+ DataField field = rowType.getFields().get(idx);
+ return new LeafPredicate(
+ Between.INSTANCE,
+ field.type(),
+ idx,
+ field.name(),
+ Arrays.asList(includedLowerBound, includedUpperBound));
}
public Predicate between(
Transform transform, Object includedLowerBound, Object
includedUpperBound) {
- return new CompoundPredicate(
- And.INSTANCE,
- Arrays.asList(
- greaterOrEqual(transform, includedLowerBound),
- lessOrEqual(transform, includedUpperBound)));
+ return new LeafPredicate(
+ transform, Between.INSTANCE, Arrays.asList(includedLowerBound,
includedUpperBound));
}
public static Predicate and(Predicate... predicates) {
diff --git
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/AbstractIndexReaderTest.java
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/AbstractIndexReaderTest.java
index 8a4c37437c..23b8d77bf2 100644
---
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/AbstractIndexReaderTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/AbstractIndexReaderTest.java
@@ -27,6 +27,7 @@ import org.apache.paimon.fs.PositionOutputStream;
import org.apache.paimon.fs.local.LocalFileIO;
import org.apache.paimon.globalindex.GlobalIndexIOMeta;
import org.apache.paimon.globalindex.GlobalIndexParallelWriter;
+import org.apache.paimon.globalindex.GlobalIndexReader;
import org.apache.paimon.globalindex.GlobalIndexResult;
import org.apache.paimon.globalindex.ResultEntry;
import org.apache.paimon.globalindex.io.GlobalIndexFileReader;
@@ -34,6 +35,8 @@ import org.apache.paimon.globalindex.io.GlobalIndexFileWriter;
import org.apache.paimon.io.cache.CacheManager;
import org.apache.paimon.options.MemorySize;
import org.apache.paimon.options.Options;
+import org.apache.paimon.predicate.FieldRef;
+import org.apache.paimon.testutils.junit.parameterized.Parameters;
import org.apache.paimon.types.BigIntType;
import org.apache.paimon.types.BooleanType;
import org.apache.paimon.types.CharType;
@@ -55,14 +58,20 @@ import org.apache.paimon.utils.DecimalUtils;
import org.apache.paimon.utils.Pair;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.TestTemplate;
import org.junit.jupiter.api.io.TempDir;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
+import java.util.Objects;
import java.util.Random;
+import java.util.TreeSet;
import java.util.UUID;
import java.util.function.Predicate;
import java.util.stream.Collectors;
@@ -70,7 +79,7 @@ import java.util.stream.Collectors;
import static org.assertj.core.api.Assertions.assertThat;
/** Common test class for BTreeIndexReader. */
-public class AbstractIndexReaderTest {
+public abstract class AbstractIndexReaderTest {
protected static final CacheManager CACHE_MANAGER = new
CacheManager(MemorySize.VALUE_8_MB);
protected DataType dataType;
@@ -91,6 +100,25 @@ public class AbstractIndexReaderTest {
this.dataNum = (Integer) args.get(1);
}
+ @SuppressWarnings("unused")
+ @Parameters(name = "dataType&recordNum-{0}")
+ public static List<List<Object>> getVarSeg() {
+ return Arrays.asList(
+ Arrays.asList(new IntType(), 10000),
+ Arrays.asList(new VarCharType(VarCharType.MAX_LENGTH), 10000),
+ Arrays.asList(new CharType(100), 10000),
+ Arrays.asList(new FloatType(), 10000),
+ Arrays.asList(new DecimalType(), 10000),
+ Arrays.asList(new DoubleType(), 10000),
+ Arrays.asList(new BooleanType(), 10000),
+ Arrays.asList(new TinyIntType(), 10000),
+ Arrays.asList(new SmallIntType(), 10000),
+ Arrays.asList(new BigIntType(), 10000),
+ Arrays.asList(new DateType(), 10000),
+ Arrays.asList(new TimestampType(), 10000));
+ }
+
+ @BeforeEach
public void setUp() throws Exception {
fileIO = LocalFileIO.create();
fileWriter =
@@ -126,6 +154,128 @@ public class AbstractIndexReaderTest {
data.sort((p1, p2) -> comparator.compare(p1.getKey(), p2.getKey()));
}
+ @TestTemplate
+ public void testRangePredicate() throws Exception {
+ FieldRef ref = new FieldRef(1, "testField", dataType);
+
+ try (GlobalIndexReader reader = prepareDataAndCreateReader()) {
+ GlobalIndexResult result;
+ Random random = new Random();
+
+ for (int i = 0; i < 5; i++) {
+ int literalIdx = random.nextInt(dataNum);
+ Object literal = data.get(literalIdx).getKey();
+
+ // 1. test <= literal
+ result = reader.visitLessOrEqual(ref, literal).get();
+ assertResult(result, filter(obj -> comparator.compare(obj,
literal) <= 0));
+
+ // 2. test < literal
+ result = reader.visitLessThan(ref, literal).get();
+ assertResult(result, filter(obj -> comparator.compare(obj,
literal) < 0));
+
+ // 3. test >= literal
+ result = reader.visitGreaterOrEqual(ref, literal).get();
+ assertResult(result, filter(obj -> comparator.compare(obj,
literal) >= 0));
+
+ // 4. test > literal
+ result = reader.visitGreaterThan(ref, literal).get();
+ assertResult(result, filter(obj -> comparator.compare(obj,
literal) > 0));
+
+ // 5. test equal
+ result = reader.visitEqual(ref, literal).get();
+ assertResult(result, filter(obj -> comparator.compare(obj,
literal) == 0));
+
+ // 6. test not equal
+ result = reader.visitNotEqual(ref, literal).get();
+ assertResult(result, filter(obj -> comparator.compare(obj,
literal) != 0));
+
+ // 7. test between
+ int betweenOffset = random.nextInt(dataNum - literalIdx);
+ Object toLiteral = data.get(literalIdx +
betweenOffset).getKey();
+ result = reader.visitBetween(ref, literal, toLiteral).get();
+ assertResult(
+ result,
+ filter(
+ obj ->
+ comparator.compare(obj, toLiteral) <= 0
+ && comparator.compare(obj,
literal) >= 0));
+ }
+
+ // 8. test < min
+ Object literal7 = data.get(0).getKey();
+ result = reader.visitLessThan(ref, literal7).get();
+ Assertions.assertTrue(result.results().isEmpty());
+
+ // 9. test > max
+ Object literal8 = data.get(dataNum - 1).getKey();
+ result = reader.visitGreaterThan(ref, literal8).get();
+ Assertions.assertTrue(result.results().isEmpty());
+
+ // 10. test between
+ if (dataType instanceof IntType) {
+ Integer min = (Integer) (data.get(0).getKey());
+ Integer max = (Integer) (data.get(dataNum - 1).getKey());
+
+ result = reader.visitBetween(ref, min - 100, min - 1).get();
+ Assertions.assertTrue(result.results().isEmpty());
+
+ result = reader.visitBetween(ref, max + 1, max + 100).get();
+ Assertions.assertTrue(result.results().isEmpty());
+ }
+ }
+ }
+
+ @TestTemplate
+ public void testIsNull() throws Exception {
+ // set nulls
+ // make sure that there will be some btree file only containing nulls.
+ for (int i = dataNum - 1; i >= dataNum * 0.85; i--) {
+ data.get(i).setLeft(null);
+ }
+
+ FieldRef ref = new FieldRef(1, "testField", dataType);
+
+ try (GlobalIndexReader reader = prepareDataAndCreateReader()) {
+ GlobalIndexResult result;
+
+ result = reader.visitIsNull(ref).get();
+ assertResult(result, filter(Objects::isNull));
+
+ result = reader.visitIsNotNull(ref).get();
+ assertResult(result, filter(Objects::nonNull));
+ }
+ }
+
+ @TestTemplate
+ public void testInPredicate() throws Exception {
+ FieldRef ref = new FieldRef(1, "testField", dataType);
+
+ try (GlobalIndexReader reader = prepareDataAndCreateReader()) {
+ GlobalIndexResult result;
+ for (int i = 0; i < 10; i++) {
+ Random random = new Random(System.currentTimeMillis());
+ List<Object> literals =
+
data.stream().map(Pair::getKey).collect(Collectors.toList());
+ Collections.shuffle(literals, random);
+ literals = literals.subList(0, (int) (dataNum * 0.1));
+
+ TreeSet<Object> set = new TreeSet<>(comparator);
+ set.addAll(literals);
+
+ // 1. test in
+ result = reader.visitIn(ref, literals).get();
+ assertResult(result, filter(set::contains));
+
+ // 2. test not in
+ result = reader.visitNotIn(ref, literals).get();
+ assertResult(result, filter(obj -> !set.contains(obj)));
+ }
+ }
+ }
+
+ protected abstract GlobalIndexReader prepareDataAndCreateReader() throws
Exception;
+
protected GlobalIndexIOMeta writeData(List<Pair<Object, Long>> data)
throws IOException {
GlobalIndexParallelWriter indexWriter =
globalIndexer.createWriter(fileWriter);
for (Pair<Object, Long> pair : data) {
diff --git
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelectorTest.java
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelectorTest.java
index 394f3dacb5..26a13eeec5 100644
---
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelectorTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelectorTest.java
@@ -143,6 +143,19 @@ public class BTreeFileMetaSelectorTest {
result = selector.visitNotIn(ref, Arrays.asList(1, 7, 19, 30, 31));
Assertions.assertThat(result).isNotEmpty();
assertFiles(result.get(), Arrays.asList("file1", "file2", "file3",
"file4", "file5"));
+
+ // 4. test between
+ result = selector.visitBetween(ref, 0, 15);
+ Assertions.assertThat(result).isNotEmpty();
+ assertFiles(result.get(), Arrays.asList("file1", "file2", "file4"));
+
+ result = selector.visitBetween(ref, 0, 30);
+ Assertions.assertThat(result).isNotEmpty();
+ assertFiles(result.get(), Arrays.asList("file1", "file2", "file3",
"file4", "file5"));
+
+ result = selector.visitBetween(ref, 40, 50);
+ Assertions.assertThat(result).isNotEmpty();
+ Assertions.assertThat(result.get()).isEmpty();
}
private void assertFiles(List<GlobalIndexIOMeta> files, List<String>
expected) {
diff --git
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeIndexReaderTest.java
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeIndexReaderTest.java
index 74c5eb1658..e2c028286f 100644
---
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeIndexReaderTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/BTreeIndexReaderTest.java
@@ -20,36 +20,11 @@ package org.apache.paimon.globalindex.btree;
import org.apache.paimon.globalindex.GlobalIndexIOMeta;
import org.apache.paimon.globalindex.GlobalIndexReader;
-import org.apache.paimon.globalindex.GlobalIndexResult;
-import org.apache.paimon.predicate.FieldRef;
import
org.apache.paimon.testutils.junit.parameterized.ParameterizedTestExtension;
-import org.apache.paimon.testutils.junit.parameterized.Parameters;
-import org.apache.paimon.types.BigIntType;
-import org.apache.paimon.types.BooleanType;
-import org.apache.paimon.types.CharType;
-import org.apache.paimon.types.DateType;
-import org.apache.paimon.types.DecimalType;
-import org.apache.paimon.types.DoubleType;
-import org.apache.paimon.types.FloatType;
-import org.apache.paimon.types.IntType;
-import org.apache.paimon.types.SmallIntType;
-import org.apache.paimon.types.TimestampType;
-import org.apache.paimon.types.TinyIntType;
-import org.apache.paimon.types.VarCharType;
-import org.apache.paimon.utils.Pair;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.TestTemplate;
import org.junit.jupiter.api.extension.ExtendWith;
-import java.util.Arrays;
-import java.util.Collections;
import java.util.List;
-import java.util.Objects;
-import java.util.Random;
-import java.util.TreeSet;
-import java.util.stream.Collectors;
/** Test for {@link BTreeIndexReader} to read a single file. */
@ExtendWith(ParameterizedTestExtension.class)
@@ -59,126 +34,10 @@ public class BTreeIndexReaderTest extends
AbstractIndexReaderTest {
super(args);
}
- @SuppressWarnings("unused")
- @Parameters(name = "dataType&recordNum-{0}")
- public static List<List<Object>> getVarSeg() {
- return Arrays.asList(
- Arrays.asList(new IntType(), 10000),
- Arrays.asList(new VarCharType(VarCharType.MAX_LENGTH), 10000),
- Arrays.asList(new CharType(100), 10000),
- Arrays.asList(new FloatType(), 10000),
- Arrays.asList(new DecimalType(), 10000),
- Arrays.asList(new DoubleType(), 10000),
- Arrays.asList(new BooleanType(), 10000),
- Arrays.asList(new TinyIntType(), 10000),
- Arrays.asList(new SmallIntType(), 10000),
- Arrays.asList(new BigIntType(), 10000),
- Arrays.asList(new DateType(), 10000),
- Arrays.asList(new TimestampType(), 10000));
- }
-
- @BeforeEach
- public void setUp() throws Exception {
- super.setUp();
- }
-
- @TestTemplate
- public void testRangePredicate() throws Exception {
- GlobalIndexIOMeta written = writeData(data);
- FieldRef ref = new FieldRef(1, "testField", dataType);
-
- try (GlobalIndexReader reader =
- new BTreeIndexReader(keySerializer, fileReader, written,
CACHE_MANAGER)) {
- GlobalIndexResult result;
- Random random = new Random();
-
- for (int i = 0; i < 5; i++) {
- Object literal = data.get(random.nextInt(dataNum)).getKey();
-
- // 1. test <= literal
- result = reader.visitLessOrEqual(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) <= 0));
-
- // 2. test < literal
- result = reader.visitLessThan(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) < 0));
-
- // 3. test >= literal
- result = reader.visitGreaterOrEqual(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) >= 0));
-
- // 4. test > literal
- result = reader.visitGreaterThan(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) > 0));
-
- // 5. test equal
- result = reader.visitEqual(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) == 0));
-
- // 6. test not equal
- result = reader.visitNotEqual(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) != 0));
- }
-
- // 7. test < min
- Object literal7 = data.get(0).getKey();
- result = reader.visitLessThan(ref, literal7).get();
- Assertions.assertTrue(result.results().isEmpty());
-
- // 8. test > max
- Object literal8 = data.get(dataNum - 1).getKey();
- result = reader.visitGreaterThan(ref, literal8).get();
- Assertions.assertTrue(result.results().isEmpty());
- }
- }
-
- @TestTemplate
- public void testIsNull() throws Exception {
- // set nulls
- for (int i = dataNum - 1; i >= dataNum * 0.9; i--) {
- data.get(i).setLeft(null);
- }
+ @Override
+ protected GlobalIndexReader prepareDataAndCreateReader() throws Exception {
GlobalIndexIOMeta written = writeData(data);
- FieldRef ref = new FieldRef(1, "testField", dataType);
-
- try (GlobalIndexReader reader =
- new BTreeIndexReader(keySerializer, fileReader, written,
CACHE_MANAGER)) {
- GlobalIndexResult result;
-
- result = reader.visitIsNull(ref).get();
- assertResult(result, filter(Objects::isNull));
-
- result = reader.visitIsNotNull(ref).get();
- assertResult(result, filter(Objects::nonNull));
- }
- }
-
- @TestTemplate
- public void testInPredicate() throws Exception {
- GlobalIndexIOMeta written = writeData(data);
- FieldRef ref = new FieldRef(1, "testField", dataType);
-
- try (GlobalIndexReader reader =
- new BTreeIndexReader(keySerializer, fileReader, written,
CACHE_MANAGER)) {
- GlobalIndexResult result;
- for (int i = 0; i < 10; i++) {
- Random random = new Random(System.currentTimeMillis());
- List<Object> literals =
-
data.stream().map(Pair::getKey).collect(Collectors.toList());
- Collections.shuffle(literals, random);
- literals = literals.subList(0, (int) (dataNum * 0.1));
-
- TreeSet<Object> set = new TreeSet<>(comparator);
- set.addAll(literals);
-
- // 1. test in
- result = reader.visitIn(ref, literals).get();
- assertResult(result, filter(set::contains));
- // 2. test not in
- result = reader.visitNotIn(ref, literals).get();
- assertResult(result, filter(obj -> !set.contains(obj)));
- }
- }
+ return new BTreeIndexReader(keySerializer, fileReader, written,
CACHE_MANAGER);
}
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeIndexReaderTest.java
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeIndexReaderTest.java
index 0dfcb712e6..d3141c58fb 100644
---
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeIndexReaderTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeIndexReaderTest.java
@@ -20,37 +20,12 @@ package org.apache.paimon.globalindex.btree;
import org.apache.paimon.globalindex.GlobalIndexIOMeta;
import org.apache.paimon.globalindex.GlobalIndexReader;
-import org.apache.paimon.globalindex.GlobalIndexResult;
-import org.apache.paimon.predicate.FieldRef;
import
org.apache.paimon.testutils.junit.parameterized.ParameterizedTestExtension;
-import org.apache.paimon.testutils.junit.parameterized.Parameters;
-import org.apache.paimon.types.BigIntType;
-import org.apache.paimon.types.BooleanType;
-import org.apache.paimon.types.CharType;
-import org.apache.paimon.types.DateType;
-import org.apache.paimon.types.DecimalType;
-import org.apache.paimon.types.DoubleType;
-import org.apache.paimon.types.FloatType;
-import org.apache.paimon.types.IntType;
-import org.apache.paimon.types.SmallIntType;
-import org.apache.paimon.types.TimestampType;
-import org.apache.paimon.types.TinyIntType;
-import org.apache.paimon.types.VarCharType;
-import org.apache.paimon.utils.Pair;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.TestTemplate;
import org.junit.jupiter.api.extension.ExtendWith;
import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
import java.util.List;
-import java.util.Objects;
-import java.util.Random;
-import java.util.TreeSet;
-import java.util.stream.Collectors;
/** Test for {@link LazyFilteredBTreeReader} to read multiple files. */
@ExtendWith(ParameterizedTestExtension.class)
@@ -60,126 +35,11 @@ public class LazyFilteredBTreeIndexReaderTest extends
AbstractIndexReaderTest {
super(args);
}
- @SuppressWarnings("unused")
- @Parameters(name = "dataType&recordNum-{0}")
- public static List<List<Object>> getVarSeg() {
- return Arrays.asList(
- Arrays.asList(new IntType(), 10000),
- Arrays.asList(new VarCharType(VarCharType.MAX_LENGTH), 10000),
- Arrays.asList(new CharType(100), 10000),
- Arrays.asList(new FloatType(), 10000),
- Arrays.asList(new DecimalType(), 10000),
- Arrays.asList(new DoubleType(), 10000),
- Arrays.asList(new BooleanType(), 10000),
- Arrays.asList(new TinyIntType(), 10000),
- Arrays.asList(new SmallIntType(), 10000),
- Arrays.asList(new BigIntType(), 10000),
- Arrays.asList(new DateType(), 10000),
- Arrays.asList(new TimestampType(), 10000));
- }
-
- @BeforeEach
- public void setUp() throws Exception {
- super.setUp();
- }
-
- @TestTemplate
- public void testRangePredicate() throws Exception {
- List<GlobalIndexIOMeta> written = writeData();
- FieldRef ref = new FieldRef(1, "testField", dataType);
-
- try (GlobalIndexReader reader = globalIndexer.createReader(fileReader,
written)) {
- GlobalIndexResult result;
- Random random = new Random();
-
- for (int i = 0; i < 5; i++) {
- org.apache.paimon.utils.Pair<Object, Long> pair =
data.get(random.nextInt(dataNum));
- Object literal = pair.getLeft();
-
- // 1. test <= literal
- result = reader.visitLessOrEqual(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) <= 0));
-
- // 2. test < literal
- result = reader.visitLessThan(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) < 0));
-
- // 3. test >= literal
- result = reader.visitGreaterOrEqual(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) >= 0));
-
- // 4. test > literal
- result = reader.visitGreaterThan(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) > 0));
-
- // 5. test equal
- result = reader.visitEqual(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) == 0));
-
- // 6. test not equal
- result = reader.visitNotEqual(ref, literal).get();
- assertResult(result, filter(obj -> comparator.compare(obj,
literal) != 0));
- }
-
- // 7. test < min
- Object literal7 = data.get(0).getKey();
- result = reader.visitLessThan(ref, literal7).get();
- Assertions.assertTrue(result.results().isEmpty());
-
- // 8. test > max
- Object literal8 = data.get(dataNum - 1).getKey();
- result = reader.visitGreaterThan(ref, literal8).get();
- Assertions.assertTrue(result.results().isEmpty());
- }
- }
-
- @TestTemplate
- public void testIsNull() throws Exception {
- // set nulls
- // make sure that there will be some btree file only containing nulls.
- for (int i = dataNum - 1; i >= dataNum * 0.85; i--) {
- data.get(i).setLeft(null);
- }
+ @Override
+ protected GlobalIndexReader prepareDataAndCreateReader() throws Exception {
List<GlobalIndexIOMeta> written = writeData();
- FieldRef ref = new FieldRef(1, "testField", dataType);
- try (GlobalIndexReader reader = globalIndexer.createReader(fileReader,
written)) {
- GlobalIndexResult result;
-
- result = reader.visitIsNull(ref).get();
- assertResult(result, filter(Objects::isNull));
-
- result = reader.visitIsNotNull(ref).get();
- assertResult(result, filter(Objects::nonNull));
- }
- }
-
- @TestTemplate
- public void testInPredicate() throws Exception {
- List<GlobalIndexIOMeta> written = writeData();
- FieldRef ref = new FieldRef(1, "testField", dataType);
-
- try (GlobalIndexReader reader = globalIndexer.createReader(fileReader,
written)) {
- GlobalIndexResult result;
- for (int i = 0; i < 10; i++) {
- Random random = new Random(System.currentTimeMillis());
- List<Object> literals =
-
data.stream().map(Pair::getKey).collect(Collectors.toList());
- Collections.shuffle(literals, random);
- literals = literals.subList(0, (int) (dataNum * 0.1));
-
- TreeSet<Object> set = new TreeSet<>(comparator);
- set.addAll(literals);
-
- // 1. test in
- result = reader.visitIn(ref, literals).get();
- assertResult(result, filter(set::contains));
-
- // 2. test not in
- result = reader.visitNotIn(ref, literals).get();
- assertResult(result, filter(obj -> !set.contains(obj)));
- }
- }
+ return globalIndexer.createReader(fileReader, written);
}
private List<GlobalIndexIOMeta> writeData() throws Exception {
diff --git
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/SearchArgumentToPredicateConverterTest.java
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/SearchArgumentToPredicateConverterTest.java
index 7599cf127d..a1cdf8c361 100644
---
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/SearchArgumentToPredicateConverterTest.java
+++
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/SearchArgumentToPredicateConverterTest.java
@@ -254,8 +254,7 @@ public class SearchArgumentToPredicateConverterTest {
SearchArgument.Builder builder = SearchArgumentFactory.newBuilder();
SearchArgument sarg =
builder.between("f_bigint", PredicateLeaf.Type.LONG, 100L,
200L).build();
- Predicate expected =
- PredicateBuilder.and(BUILDER.greaterOrEqual(1, 100L),
BUILDER.lessOrEqual(1, 200L));
+ Predicate expected = BUILDER.between(1, 100L, 200L);
assertExpected(sarg, expected);
}
@@ -267,8 +266,7 @@ public class SearchArgumentToPredicateConverterTest {
.between("f_bigint", PredicateLeaf.Type.LONG, 100L,
200L)
.end()
.build();
- Predicate expected =
- PredicateBuilder.or(BUILDER.lessThan(1, 100L),
BUILDER.greaterThan(1, 200L));
+ Predicate expected = BUILDER.between(1, 100L, 200L).negate().get();
assertExpected(sarg, expected);
}