This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 961d4447dc [common] Correct the range-bitmap's dictionary return 
results (#5980)
961d4447dc is described below

commit 961d4447dc37a1b0f80f43d907f8144e12f750d8
Author: Tan-JiaLiang <[email protected]>
AuthorDate: Tue Jul 29 21:11:20 2025 +0800

    [common] Correct the range-bitmap's dictionary return results (#5980)
---
 .../paimon/fileindex/rangebitmap/RangeBitmap.java  |  8 +++-
 .../dictionary/chunked/AbstractChunk.java          |  8 +++-
 .../dictionary/chunked/ChunkedDictionary.java      |  7 +++
 .../rangebitmap/ChunkedDictionaryTest.java         | 31 ++++++++----
 .../rangebitmap/RangeBitmapFileIndexTest.java      | 56 +++++++++++++++++++++-
 5 files changed, 96 insertions(+), 14 deletions(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
index c716ea26a3..08aaeefe82 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
@@ -142,7 +142,9 @@ public class RangeBitmap {
         }
 
         int code = getDictionary().find(key);
-        return code < 0 ? getBitSliceIndexBitmap().gte(-code) : 
getBitSliceIndexBitmap().gte(code);
+        return code < 0
+                ? getBitSliceIndexBitmap().gte(-code - 1)
+                : getBitSliceIndexBitmap().gte(code);
     }
 
     public RoaringBitmap32 gt(Object key) {
@@ -155,7 +157,9 @@ public class RangeBitmap {
         }
 
         int code = getDictionary().find(key);
-        return code < 0 ? getBitSliceIndexBitmap().gte(-code) : 
getBitSliceIndexBitmap().gt(code);
+        return code < 0
+                ? getBitSliceIndexBitmap().gte(-code - 1)
+                : getBitSliceIndexBitmap().gt(code);
     }
 
     public RoaringBitmap32 in(List<Object> keys) {
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/AbstractChunk.java
 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/AbstractChunk.java
index f76c0aa75a..0148e9e375 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/AbstractChunk.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/AbstractChunk.java
@@ -36,6 +36,7 @@ public abstract class AbstractChunk implements Chunk {
         }
         int low = 0;
         int high = size() - 1;
+        int base = code() + 1;
         while (low <= high) {
             int mid = (low + high) >>> 1;
             int result = comparator.compare(get(mid), key);
@@ -44,10 +45,10 @@ public abstract class AbstractChunk implements Chunk {
             } else if (result < 0) {
                 low = mid + 1;
             } else {
-                return code() + mid + 1;
+                return base + mid;
             }
         }
-        return -(code() + low + 1);
+        return -(base + low + 1);
     }
 
     @Override
@@ -57,6 +58,9 @@ public abstract class AbstractChunk implements Chunk {
             return key();
         }
         int index = code - current - 1;
+        if (index < 0 || index >= size()) {
+            throw new IndexOutOfBoundsException("invalid code: " + code);
+        }
         return get(index);
     }
 
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
index cb6b695a01..8e4cd120fb 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
@@ -87,11 +87,18 @@ public class ChunkedDictionary implements Dictionary {
                 return found.code();
             }
         }
+        // key not found
+        if (low == 0) {
+            return -(low + 1);
+        }
         return get(low - 1).find(key);
     }
 
     @Override
     public Object find(int code) {
+        if (code < 0) {
+            throw new IndexOutOfBoundsException("invalid code: " + code);
+        }
         int low = 0;
         int high = size - 1;
         while (low <= high) {
diff --git 
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
 
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
index 41d6db49d7..f6736d4875 100644
--- 
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
+++ 
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
@@ -37,6 +37,7 @@ import java.util.UUID;
 import java.util.stream.Collectors;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
 /** Test case for {@link ChunkedDictionary}. */
 public class ChunkedDictionaryTest {
@@ -80,19 +81,26 @@ public class ChunkedDictionaryTest {
             Integer value = expected.get(i);
 
             // find code by key
-            assertThat(dictionary.find(value)).isEqualTo(i);
+            int result = dictionary.find(value);
+            assertThat(result).isEqualTo(i);
+            assertThat(result).isEqualTo(Collections.binarySearch(expected, 
value));
 
             // find key by code
             assertThat(dictionary.find(i)).isEqualTo(value);
         }
 
+        // find key by code out of range
+        assertThatThrownBy(() -> 
dictionary.find(-1)).isInstanceOf(IndexOutOfBoundsException.class);
+        assertThatThrownBy(() -> dictionary.find(expected.size()))
+                .isInstanceOf(IndexOutOfBoundsException.class);
+
         // not exists
         for (int i = 0; i < 10; i++) {
             Integer value = random.nextInt(BOUND) + BOUND;
             // find code by key
-            assertThat(dictionary.find(value)).isNegative();
-            assertThat(dictionary.find(value))
-                    .isEqualTo(Collections.binarySearch(expected, value) + 1);
+            int result = dictionary.find(value);
+            assertThat(result).isNegative();
+            assertThat(result).isEqualTo(Collections.binarySearch(expected, 
value));
         }
     }
 
@@ -116,19 +124,26 @@ public class ChunkedDictionaryTest {
             BinaryString value = expected.get(i);
 
             // find code by key
-            assertThat(dictionary.find(value)).isEqualTo(i);
+            int result = dictionary.find(value);
+            assertThat(result).isEqualTo(i);
+            assertThat(result).isEqualTo(Collections.binarySearch(expected, 
value));
 
             // find key by code
             assertThat(dictionary.find(i)).isEqualTo(value);
         }
 
+        // find key by code out of range
+        assertThatThrownBy(() -> 
dictionary.find(-1)).isInstanceOf(IndexOutOfBoundsException.class);
+        assertThatThrownBy(() -> dictionary.find(expected.size()))
+                .isInstanceOf(IndexOutOfBoundsException.class);
+
         // not exists
         for (int i = 0; i < 10; i++) {
             BinaryString value = 
BinaryString.fromString(UUID.randomUUID().toString() + i);
             // find code by key
-            assertThat(dictionary.find(value)).isNegative();
-            assertThat(dictionary.find(value))
-                    .isEqualTo(Collections.binarySearch(expected, value) + 1);
+            int result = dictionary.find(value);
+            assertThat(result).isNegative();
+            assertThat(result).isEqualTo(Collections.binarySearch(expected, 
value));
         }
     }
 }
diff --git 
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
 
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
index a2c385b2e9..890f83f16f 100644
--- 
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
+++ 
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
@@ -25,11 +25,13 @@ import org.apache.paimon.fileindex.bitmap.BitmapIndexResult;
 import org.apache.paimon.fs.ByteArraySeekableStream;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.predicate.FieldRef;
+import org.apache.paimon.types.IntType;
 import org.apache.paimon.types.VarCharType;
 import org.apache.paimon.utils.Pair;
 import org.apache.paimon.utils.RoaringBitmap32;
 
 import org.junit.jupiter.api.RepeatedTest;
+import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
 import java.util.HashSet;
@@ -44,9 +46,9 @@ import static org.assertj.core.api.Assertions.assertThat;
 public class RangeBitmapFileIndexTest {
 
     private static final int ROW_COUNT = 10000;
-    private static final int BOUND = 100;
+    private static final int BOUND = 1000000;
 
-    @RepeatedTest(5)
+    @RepeatedTest(10)
     public void test() {
         String prefix = "hello-";
         VarCharType varCharType = new VarCharType();
@@ -229,4 +231,54 @@ public class RangeBitmapFileIndexTest {
                     .isEqualTo(bitmap);
         }
     }
+
+    @Test
+    public void testSimple() {
+        IntType intType = new IntType();
+        FieldRef fieldRef = new FieldRef(0, "", intType);
+        RangeBitmapFileIndex bitmapFileIndex = new 
RangeBitmapFileIndex(intType, new Options());
+        FileIndexWriter writer = bitmapFileIndex.createWriter();
+        writer.writeRecord(1);
+        writer.writeRecord(3);
+        writer.writeRecord(5);
+        writer.writeRecord(7);
+        writer.writeRecord(9);
+
+        // build index
+        byte[] bytes = writer.serializedBytes();
+        ByteArraySeekableStream stream = new ByteArraySeekableStream(bytes);
+        FileIndexReader reader = bitmapFileIndex.createReader(stream, 0, 
bytes.length);
+
+        // test eq
+        assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 1)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf(0));
+        assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 2)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf());
+        assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 3)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf(1));
+        assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 4)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf());
+        assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 5)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf(2));
+
+        // test gt
+        assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef, 
0)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 2, 3, 4));
+        assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef, 
1)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf(1, 2, 3, 4));
+        assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef, 
6)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf(3, 4));
+        assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef, 
9)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf());
+
+        // test gte
+        assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef, 
0)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 2, 3, 4));
+        assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef, 
1)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 2, 3, 4));
+        assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef, 
6)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf(3, 4));
+        assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef, 
9)).get())
+                .isEqualTo(RoaringBitmap32.bitmapOf(4));
+    }
 }

Reply via email to