This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 9882eb7c62 [common] Minor fix for range-bitmap (#6033)
9882eb7c62 is described below
commit 9882eb7c62c449501faa9ec7d7350c94dd01de57
Author: Tan-JiaLiang <[email protected]>
AuthorDate: Wed Aug 6 19:53:12 2025 +0800
[common] Minor fix for range-bitmap (#6033)
---
.../fileindex/rangebitmap/BitSliceIndexBitmap.java | 3 +-
.../paimon/fileindex/rangebitmap/RangeBitmap.java | 90 ++++++++++++++++------
.../dictionary/chunked/ChunkedDictionary.java | 4 +-
.../rangebitmap/BitSliceIndexBitmapTest.java | 36 +++++++++
.../rangebitmap/ChunkedDictionaryTest.java | 13 ++++
.../rangebitmap/RangeBitmapFileIndexTest.java | 56 ++++++++++++++
6 files changed, 177 insertions(+), 25 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java
index b973c735cb..0708b43774 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java
@@ -236,7 +236,8 @@ public class BitSliceIndexBitmap {
this.min = min;
this.max = max;
this.ebm = new RoaringBitmap32();
- this.slices = new RoaringBitmap32[Long.SIZE -
Long.numberOfLeadingZeros(max)];
+ this.slices =
+ new RoaringBitmap32[Math.max(Long.SIZE -
Long.numberOfLeadingZeros(max), 1)];
for (int i = 0; i < slices.length; i++) {
slices[i] = new RoaringBitmap32();
}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
index 08aaeefe82..f8d0ceba68 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
@@ -25,6 +25,8 @@ import org.apache.paimon.fs.SeekableInputStream;
import org.apache.paimon.utils.IOUtils;
import org.apache.paimon.utils.RoaringBitmap32;
+import javax.annotation.Nullable;
+
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Comparator;
@@ -40,8 +42,9 @@ public class RangeBitmap {
public static final byte CURRENT_VERSION = VERSION_1;
private final int rid;
- private final Object min;
- private final Object max;
+ @Nullable private final Object min;
+ @Nullable private final Object max;
+ private final int cardinality;
private final int dictionaryOffset;
private final int bsiOffset;
@@ -74,10 +77,9 @@ public class RangeBitmap {
throw new RuntimeException("Invalid version " + version);
}
this.rid = headers.getInt();
- // ignore cardinality
- headers.getInt();
- this.min = deserializer.deserialize(headers);
- this.max = deserializer.deserialize(headers);
+ this.cardinality = headers.getInt();
+ this.min = cardinality <= 0 ? null : deserializer.deserialize(headers);
+ this.max = cardinality <= 0 ? null : deserializer.deserialize(headers);
int dictionaryLength = headers.getInt();
this.dictionaryOffset = offset + Integer.BYTES + headerLength;
@@ -89,6 +91,10 @@ public class RangeBitmap {
}
public RoaringBitmap32 eq(Object key) {
+ if (cardinality <= 0) {
+ return new RoaringBitmap32();
+ }
+
int compareMin = comparator.compare(key, min);
int compareMax = comparator.compare(key, max);
if (compareMin == 0 && compareMax == 0) {
@@ -105,10 +111,17 @@ public class RangeBitmap {
}
public RoaringBitmap32 neq(Object key) {
+ if (cardinality <= 0) {
+ return new RoaringBitmap32();
+ }
return not(eq(key));
}
public RoaringBitmap32 lte(Object key) {
+ if (cardinality <= 0) {
+ return new RoaringBitmap32();
+ }
+
int compareMin = comparator.compare(key, min);
int compareMax = comparator.compare(key, max);
if (compareMax >= 0) {
@@ -121,6 +134,10 @@ public class RangeBitmap {
}
public RoaringBitmap32 lt(Object key) {
+ if (cardinality <= 0) {
+ return new RoaringBitmap32();
+ }
+
int compareMin = comparator.compare(key, min);
int compareMax = comparator.compare(key, max);
if (compareMax > 0) {
@@ -133,6 +150,10 @@ public class RangeBitmap {
}
public RoaringBitmap32 gte(Object key) {
+ if (cardinality <= 0) {
+ return new RoaringBitmap32();
+ }
+
int compareMin = comparator.compare(key, min);
int compareMax = comparator.compare(key, max);
if (compareMin <= 0) {
@@ -148,6 +169,10 @@ public class RangeBitmap {
}
public RoaringBitmap32 gt(Object key) {
+ if (cardinality <= 0) {
+ return new RoaringBitmap32();
+ }
+
int compareMin = comparator.compare(key, min);
int compareMax = comparator.compare(key, max);
if (compareMin < 0) {
@@ -163,6 +188,10 @@ public class RangeBitmap {
}
public RoaringBitmap32 in(List<Object> keys) {
+ if (cardinality <= 0) {
+ return new RoaringBitmap32();
+ }
+
RoaringBitmap32 bitmap = new RoaringBitmap32();
for (Object key : keys) {
bitmap.or(eq(key));
@@ -171,16 +200,28 @@ public class RangeBitmap {
}
public RoaringBitmap32 notIn(List<Object> keys) {
+ if (cardinality <= 0) {
+ return new RoaringBitmap32();
+ }
+
return not(in(keys));
}
public RoaringBitmap32 isNull() {
+ if (cardinality <= 0) {
+ return rid > 0 ? RoaringBitmap32.bitmapOf(0, rid - 1) : new
RoaringBitmap32();
+ }
+
RoaringBitmap32 bitmap = isNotNull();
bitmap.flip(0, rid);
return bitmap;
}
public RoaringBitmap32 isNotNull() {
+ if (cardinality <= 0) {
+ return new RoaringBitmap32();
+ }
+
return getBitSliceIndexBitmap().isNotNull();
}
@@ -228,14 +269,14 @@ public class RangeBitmap {
private int rid;
private final TreeMap<Object, RoaringBitmap32> bitmaps;
- private final Dictionary.Appender appender;
- private final KeyFactory.KeySerializer serializer;
+ private final KeyFactory factory;
+ private final int limitedSerializedSizeInBytes;
public Appender(KeyFactory factory, int limitedSerializedSizeInBytes) {
this.rid = 0;
this.bitmaps = new TreeMap<>(factory.createComparator());
- this.appender = new ChunkedDictionary.Appender(factory,
limitedSerializedSizeInBytes);
- this.serializer = factory.createSerializer();
+ this.factory = factory;
+ this.limitedSerializedSizeInBytes = limitedSerializedSizeInBytes;
}
public void append(Object key) {
@@ -246,19 +287,17 @@ public class RangeBitmap {
}
public byte[] serialize() {
- if (rid == 0) {
- return new byte[] {};
- }
-
int code = 0;
BitSliceIndexBitmap.Appender bsi =
new BitSliceIndexBitmap.Appender(0, bitmaps.size() - 1);
+ ChunkedDictionary.Appender dictionary =
+ new ChunkedDictionary.Appender(factory,
limitedSerializedSizeInBytes);
for (Map.Entry<Object, RoaringBitmap32> entry :
bitmaps.entrySet()) {
Object key = entry.getKey();
RoaringBitmap32 bitmap = entry.getValue();
// build the dictionary
- appender.sortedAppend(key, code);
+ dictionary.sortedAppend(key, code);
// build the relationship between position and code by the bsi
Iterator<Integer> iterator = bitmap.iterator();
@@ -269,20 +308,23 @@ public class RangeBitmap {
code++;
}
+ // serializer
+ KeyFactory.KeySerializer serializer = factory.createSerializer();
+
// min & max
- Object min = bitmaps.firstKey();
- Object max = bitmaps.lastKey();
+ Object min = bitmaps.isEmpty() ? null : bitmaps.firstKey();
+ Object max = bitmaps.isEmpty() ? null : bitmaps.lastKey();
int headerSize = 0;
headerSize += Byte.BYTES; // version
headerSize += Integer.BYTES; // rid
headerSize += Integer.BYTES; // cardinality
- headerSize += serializer.serializedSizeInBytes(min); // min
- headerSize += serializer.serializedSizeInBytes(max); // max
+ headerSize += min == null ? 0 :
serializer.serializedSizeInBytes(min); // min
+ headerSize += max == null ? 0 :
serializer.serializedSizeInBytes(max); // max
headerSize += Integer.BYTES; // dictionary length
// dictionary
- byte[] dictionarySerializeInBytes = appender.serialize();
+ byte[] dictionarySerializeInBytes = dictionary.serialize();
int dictionaryLength = dictionarySerializeInBytes.length;
// bsi
@@ -298,8 +340,12 @@ public class RangeBitmap {
buffer.put(CURRENT_VERSION);
buffer.putInt(rid);
buffer.putInt(bitmaps.size());
- serializer.serialize(buffer, min);
- serializer.serialize(buffer, max);
+ if (min != null) {
+ serializer.serialize(buffer, min);
+ }
+ if (max != null) {
+ serializer.serialize(buffer, max);
+ }
buffer.putInt(dictionaryLength);
// write dictionary
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
index 8e4cd120fb..6a719fdad0 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/dictionary/chunked/ChunkedDictionary.java
@@ -193,13 +193,13 @@ public class ChunkedDictionary implements Dictionary {
throw new IllegalArgumentException("key should not be null");
}
- if (this.key != null && comparator.compare(this.key, key) > 0) {
+ if (this.key != null && comparator.compare(this.key, key) >= 0) {
throw new IllegalArgumentException("key can not out of order");
} else {
this.key = key;
}
- if (this.code != null && this.code.compareTo(code) > 0) {
+ if (this.code != null && this.code.compareTo(code) >= 0) {
throw new IllegalArgumentException("code can not out of
order");
} else {
this.code = code;
diff --git
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmapTest.java
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmapTest.java
index a1d2faa4d4..1a52a05db7 100644
---
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmapTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmapTest.java
@@ -23,6 +23,7 @@ import org.apache.paimon.utils.Pair;
import org.apache.paimon.utils.RoaringBitmap32;
import org.junit.jupiter.api.RepeatedTest;
+import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
@@ -108,4 +109,39 @@ public class BitSliceIndexBitmapTest {
assertThat(bsi.get(pair.getKey())).isEqualTo(pair.getValue());
}
}
+
+ @Test
+ public void testSingle() throws IOException {
+ int size = 5;
+ BitSliceIndexBitmap.Appender appender = new
BitSliceIndexBitmap.Appender(0, 0);
+ for (int i = 0; i < size; i++) {
+ appender.append(i, 0);
+ }
+
+ ByteBuffer serialize = appender.serialize();
+ ByteArraySeekableStream in = new
ByteArraySeekableStream(serialize.array());
+ BitSliceIndexBitmap bsi = new BitSliceIndexBitmap(in, 0);
+
+ assertThat(bsi.isNotNull()).isEqualTo(RoaringBitmap32.bitmapOf(0, 1,
2, 3, 4));
+ assertThat(bsi.eq(0)).isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 2, 3,
4));
+ assertThat(bsi.gt(0)).isEqualTo(RoaringBitmap32.bitmapOf());
+ assertThat(bsi.gte(0)).isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 2, 3,
4));
+ for (int i = 0; i < 5; i++) {
+ assertThat(bsi.get(i)).isEqualTo(0);
+ }
+ }
+
+ @Test
+ public void testBuildEmpty() throws IOException {
+ BitSliceIndexBitmap.Appender appender = new
BitSliceIndexBitmap.Appender(0, 0);
+
+ ByteBuffer serialize = appender.serialize();
+ ByteArraySeekableStream in = new
ByteArraySeekableStream(serialize.array());
+ BitSliceIndexBitmap bsi = new BitSliceIndexBitmap(in, 0);
+
+ assertThat(bsi.isNotNull()).isEqualTo(RoaringBitmap32.bitmapOf());
+ assertThat(bsi.eq(0)).isEqualTo(RoaringBitmap32.bitmapOf());
+ assertThat(bsi.gt(0)).isEqualTo(RoaringBitmap32.bitmapOf());
+ assertThat(bsi.gte(0)).isEqualTo(RoaringBitmap32.bitmapOf());
+ }
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
index f6736d4875..e26ab9d673 100644
---
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/ChunkedDictionaryTest.java
@@ -24,6 +24,7 @@ import
org.apache.paimon.fileindex.rangebitmap.dictionary.chunked.KeyFactory;
import org.apache.paimon.fs.ByteArraySeekableStream;
import org.junit.jupiter.api.RepeatedTest;
+import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.ArrayList;
@@ -54,6 +55,18 @@ public class ChunkedDictionaryTest {
testVariableLengthChunkedDictionary(512); // test chunked
}
+ @Test
+ public void testBuildEmpty() throws IOException {
+ KeyFactory.IntKeyFactory factory = new KeyFactory.IntKeyFactory();
+ ChunkedDictionary.Appender appender = new
ChunkedDictionary.Appender(factory, 0);
+ ChunkedDictionary dictionary =
+ new ChunkedDictionary(
+ new ByteArraySeekableStream(appender.serialize()), 0,
factory);
+ assertThat(dictionary.find(new Integer(0))).isEqualTo(-1);
+ assertThat(dictionary.find(new Integer(1))).isEqualTo(-1);
+ assertThat(dictionary.find(new Integer(3))).isEqualTo(-1);
+ }
+
private void testFixedLengthChunkedDictionary(int limitedSize) throws
IOException {
Random random = new Random();
Set<Integer> set = new HashSet<>();
diff --git
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
index 890f83f16f..372516de94 100644
---
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
@@ -281,4 +281,60 @@ public class RangeBitmapFileIndexTest {
assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef,
9)).get())
.isEqualTo(RoaringBitmap32.bitmapOf(4));
}
+
+ @Test
+ public void testOnlyNulls() {
+ IntType intType = new IntType();
+ FieldRef fieldRef = new FieldRef(0, "", intType);
+ RangeBitmapFileIndex bitmapFileIndex = new
RangeBitmapFileIndex(intType, new Options());
+ FileIndexWriter writer = bitmapFileIndex.createWriter();
+ writer.writeRecord(null);
+ writer.writeRecord(null);
+
+ // build index
+ byte[] bytes = writer.serializedBytes();
+ ByteArraySeekableStream stream = new ByteArraySeekableStream(bytes);
+ FileIndexReader reader = bitmapFileIndex.createReader(stream, 0,
bytes.length);
+
+ // test is null
+ assertThat(((BitmapIndexResult) reader.visitIsNull(fieldRef)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf(0, 1));
+ // test is not null
+ assertThat(((BitmapIndexResult) reader.visitIsNotNull(fieldRef)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+
+ // test EQ
+ assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 0)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+ // test GT
+ assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef,
0)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+ }
+
+ @Test
+ public void testBuildEmpty() {
+ IntType intType = new IntType();
+ FieldRef fieldRef = new FieldRef(0, "", intType);
+ RangeBitmapFileIndex bitmapFileIndex = new
RangeBitmapFileIndex(intType, new Options());
+ FileIndexWriter writer = bitmapFileIndex.createWriter();
+
+ // build index
+ byte[] bytes = writer.serializedBytes();
+ ByteArraySeekableStream stream = new ByteArraySeekableStream(bytes);
+ FileIndexReader reader = bitmapFileIndex.createReader(stream, 0,
bytes.length);
+
+ // test is null
+ assertThat(((BitmapIndexResult) reader.visitIsNull(fieldRef)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+ // test is not null
+ assertThat(((BitmapIndexResult) reader.visitIsNotNull(fieldRef)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+
+ // test EQ
+ assertThat(((BitmapIndexResult) reader.visitEqual(fieldRef, 0)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+ // test GT
+ assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef,
0)).get())
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+ }
}