This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 961d4447dc [common] Correct the range-bitmap's dictionary return
results (#5980)
961d4447dc is described below
commit 961d4447dc37a1b0f80f43d907f8144e12f750d8
Author: Tan-JiaLiang <[email protected]>
AuthorDate: Tue Jul 29 21:11:20 2025 +0800
[common] Correct the range-bitmap's dictionary return results (#5980)
---
.../paimon/fileindex/rangebitmap/RangeBitmap.java | 8 +++-
.../dictionary/chunked/AbstractChunk.java | 8 +++-
.../dictionary/chunked/ChunkedDictionary.java | 7 +++
.../rangebitmap/ChunkedDictionaryTest.java | 31 ++++++++----
.../rangebitmap/RangeBitmapFileIndexTest.java | 56 +++++++++++++++++++++-
5 files changed, 96 insertions(+), 14 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
index c716ea26a3..08aaeefe82 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
@@ -142,7 +142,9 @@ public class RangeBitmap {
}
int code = getDictionary().find(key);
- return code < 0 ? getBitSliceIndexBitmap().gte(-code) :
getBitSliceIndexBitmap().gte(code);
+ return code < 0
+ ? getBitSliceIndexBitmap().gte(-code - 1)
+ : getBitSliceIndexBitmap().gte(code);
}
public RoaringBitmap32 gt(Object key) {
@@ -155,7 +157,9 @@ public class RangeBitmap {
}
int code = getDictionary().find(key);
- return code < 0 ? getBitSliceIndexBitmap().gte(-code) :
getBitSliceIndexBitmap().gt(code);
+ return code < 0
+ ? getBitSliceIndexBitmap().gte(-code - 1)
+ : getBitSliceIndexBitmap().gt(code);
}
public RoaringBitmap32 in(List<Object> keys) {
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/AbstractChunk.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/AbstractChunk.java
index f76c0aa75a..0148e9e375 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/AbstractChunk.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/AbstractChunk.java
@@ -36,6 +36,7 @@ public abstract class AbstractChunk implements Chunk {
}
int low = 0;
int high = size() - 1;
+ int base = code() + 1;
while (low <= high) {
int mid = (low + high) >>> 1;
int result = comparator.compare(get(mid), key);
@@ -44,10 +45,10 @@ public abstract class AbstractChunk implements Chunk {
} else if (result < 0) {
low = mid + 1;
} else {
- return code() + mid + 1;
+ return base + mid;
}
}
- return -(code() + low + 1);
+ return -(base + low + 1);
}
@Override
@@ -57,6 +58,9 @@ public abstract class AbstractChunk implements Chunk {
return key();
}
int index = code - current - 1;
+ if (index < 0 || index >= size()) {
+ throw new IndexOutOfBoundsException("invalid code: " + code);
+ }
return get(index);
}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
index cb6b695a01..8e4cd120fb 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
@@ -87,11 +87,18 @@ public class ChunkedDictionary implements Dictionary {
return found.code();
}
}
+ // key not found
+ if (low == 0) {
+ return -(low + 1);
+ }
return get(low - 1).find(key);
}
@Override
public Object find(int code) {
+ if (code < 0) {
+ throw new IndexOutOfBoundsException("invalid code: " + code);
+ }
int low = 0;
int high = size - 1;
while (low <= high) {
diff --git
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
index 41d6db49d7..f6736d4875 100644
---
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
@@ -37,6 +37,7 @@ import java.util.UUID;
import java.util.stream.Collectors;
import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
/** Test case for {@link ChunkedDictionary}. */
public class ChunkedDictionaryTest {
@@ -80,19 +81,26 @@ public class ChunkedDictionaryTest {
Integer value = expected.get(i);
// find code by key
- assertThat(dictionary.find(value)).isEqualTo(i);
+ int result = dictionary.find(value);
+ assertThat(result).isEqualTo(i);
+ assertThat(result).isEqualTo(Collections.binarySearch(expected,
value));
// find key by code
assertThat(dictionary.find(i)).isEqualTo(value);
}
+ // find key by code out of range
+ assertThatThrownBy(() ->
dictionary.find(-1)).isInstanceOf(IndexOutOfBoundsException.class);
+ assertThatThrownBy(() -> dictionary.find(expected.size()))
+ .isInstanceOf(IndexOutOfBoundsException.class);
+
// not exists
for (int i = 0; i < 10; i++) {
Integer value = random.nextInt(BOUND) + BOUND;
// find code by key
- assertThat(dictionary.find(value)).isNegative();
- assertThat(dictionary.find(value))
- .isEqualTo(Collections.binarySearch(expected, value) + 1);
+ int result = dictionary.find(value);
+ assertThat(result).isNegative();
+ assertThat(result).isEqualTo(Collections.binarySearch(expected,
value));
}
}
@@ -116,19 +124,26 @@ public class ChunkedDictionaryTest {
BinaryString value = expected.get(i);
// find code by key
- assertThat(dictionary.find(value)).isEqualTo(i);
+ int result = dictionary.find(value);
+ assertThat(result).isEqualTo(i);
+ assertThat(result).isEqualTo(Collections.binarySearch(expected,
value));
// find key by code
assertThat(dictionary.find(i)).isEqualTo(value);
}
+ // find key by code out of range
+ assertThatThrownBy(() ->
dictionary.find(-1)).isInstanceOf(IndexOutOfBoundsException.class);
+ assertThatThrownBy(() -> dictionary.find(expected.size()))
+ .isInstanceOf(IndexOutOfBoundsException.class);
+
// not exists
for (int i = 0; i < 10; i++) {
BinaryString value =
BinaryString.fromString(UUID.randomUUID().toString() + i);
// find code by key
- assertThat(dictionary.find(value)).isNegative();
- assertThat(dictionary.find(value))
- .isEqualTo(Collections.binarySearch(expected, value) + 1);
+ int result = dictionary.find(value);
+ assertThat(result).isNegative();
+ assertThat(result).isEqualTo(Collections.binarySearch(expected,
value));
}
}
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
index a2c385b2e9..890f83f16f 100644
---
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
@@ -25,11 +25,13 @@ import org.apache.paimon.fileindex.bitmap.BitmapIndexResult;
import org.apache.paimon.fs.ByteArraySeekableStream;
import org.apache.paimon.options.Options;
import org.apache.paimon.predicate.FieldRef;
+import org.apache.paimon.types.IntType;
import org.apache.paimon.types.VarCharType;
import org.apache.paimon.utils.Pair;
import org.apache.paimon.utils.RoaringBitmap32;
import org.junit.jupiter.api.RepeatedTest;
+import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.HashSet;
@@ -44,9 +46,9 @@ import static org.assertj.core.api.Assertions.assertThat;
public class RangeBitmapFileIndexTest {
private static final int ROW_COUNT = 10000;
- private static final int BOUND = 100;
+ private static final int BOUND = 1000000;
- @RepeatedTest(5)
+ @RepeatedTest(10)
public void test() {
String prefix = "hello-";
VarCharType varCharType = new VarCharType();
@@ -229,4 +231,54 @@ public class RangeBitmapFileIndexTest {
.isEqualTo(bitmap);
}
}
+
+ @Test
+ public void testSimple() {
+ IntType intType = new IntType();
+ FieldRef fieldRef = new FieldRef(0, "", intType);
+ RangeBitmapFileIndex bitmapFileIndex = new
RangeBitmapFileIndex(intType, new Options());
+ FileIndexWriter writer = bitmapFileIndex.createWriter();
+ writer.writeRecord(1);
+ writer.writeRecord(3);
+ writer.writeRecord(5);
+ writer.writeRecord(7);
+ writer.writeRecord(9);
+
+ // build index
+ byte[] bytes = writer.serializedBytes();
+ ByteArraySeekableStream stream = new ByteArraySeekableStream(bytes);
+ FileIndexReader reader = bitmapFileIndex.createReader(stream, 0,
bytes.length);
+
+ // test eq
+ assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 1)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(0));
+ assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 2)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+ assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 3)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(1));
+ assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 4)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+ assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 5)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(2));
+
+ // test gt
+ assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef,
0)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 2, 3, 4));
+ assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef,
1)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(1, 2, 3, 4));
+ assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef,
6)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(3, 4));
+ assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef,
9)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+
+ // test gte
+ assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef,
0)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 2, 3, 4));
+ assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef,
1)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 2, 3, 4));
+ assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef,
6)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(3, 4));
+ assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef,
9)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(4));
+ }
}