This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 14ea82d5a5 [core] Support multi ordering TopN with allow duplicate in
bitmap (#6178)
14ea82d5a5 is described below
commit 14ea82d5a5f42dce139979cf74e3f2c683944a2a
Author: Arnav Balyan <[email protected]>
AuthorDate: Mon Sep 8 10:41:29 2025 +0530
[core] Support multi ordering TopN with allow duplicate in bitmap (#6178)
---
.../fileindex/rangebitmap/BitSliceIndexBitmap.java | 14 +++-
.../paimon/fileindex/rangebitmap/RangeBitmap.java | 18 +++-
.../rangebitmap/RangeBitmapFileIndex.java | 15 ++--
.../rangebitmap/BitSliceIndexBitmapTest.java | 8 +-
.../rangebitmap/RangeBitmapFileIndexTest.java | 98 ++++++++++++++++++++++
5 files changed, 135 insertions(+), 18 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java
index 978d4635c9..6af3c6b34c 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java
@@ -152,7 +152,7 @@ public class BitSliceIndexBitmap {
return gt(code - 1);
}
- public RoaringBitmap32 topK(int k, @Nullable RoaringBitmap32 foundSet) {
+ public RoaringBitmap32 topK(int k, @Nullable RoaringBitmap32 foundSet,
boolean strict) {
if (k == 0 || (foundSet != null && foundSet.isEmpty())) {
return new RoaringBitmap32();
}
@@ -182,8 +182,11 @@ public class BitSliceIndexBitmap {
}
}
- // only k results should be returned
RoaringBitmap32 f = RoaringBitmap32.or(g, e);
+ if (!strict) {
+ return f;
+ }
+
long n = f.getCardinality() - k;
if (n > 0) {
Iterator<Integer> iterator = e.iterator();
@@ -195,7 +198,7 @@ public class BitSliceIndexBitmap {
return f;
}
- public RoaringBitmap32 bottomK(int k, @Nullable RoaringBitmap32 foundSet) {
+ public RoaringBitmap32 bottomK(int k, @Nullable RoaringBitmap32 foundSet,
boolean strict) {
if (k == 0 || (foundSet != null && foundSet.isEmpty())) {
return new RoaringBitmap32();
}
@@ -226,8 +229,11 @@ public class BitSliceIndexBitmap {
}
}
- // only k results should be returned
RoaringBitmap32 f = RoaringBitmap32.or(g, e);
+ if (!strict) {
+ return f;
+ }
+
long n = f.getCardinality() - k;
if (n > 0) {
Iterator<Integer> iterator = e.iterator();
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
index 3c1c2626b3..c338f47bfc 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java
@@ -280,14 +280,24 @@ public class RangeBitmap {
}
public RoaringBitmap32 topK(
- int k, SortValue.NullOrdering nullOrdering, @Nullable
RoaringBitmap32 foundSet) {
- return fillNulls(k, nullOrdering, foundSet, (l, r) ->
getBitSliceIndexBitmap().topK(l, r));
+ int k,
+ SortValue.NullOrdering nullOrdering,
+ @Nullable RoaringBitmap32 foundSet,
+ boolean strict) {
+ return fillNulls(
+ k, nullOrdering, foundSet, (l, r) ->
getBitSliceIndexBitmap().topK(l, r, strict));
}
public RoaringBitmap32 bottomK(
- int k, SortValue.NullOrdering nullOrdering, @Nullable
RoaringBitmap32 foundSet) {
+ int k,
+ SortValue.NullOrdering nullOrdering,
+ @Nullable RoaringBitmap32 foundSet,
+ boolean strict) {
return fillNulls(
- k, nullOrdering, foundSet, (l, r) ->
getBitSliceIndexBitmap().bottomK(l, r));
+ k,
+ nullOrdering,
+ foundSet,
+ (l, r) -> getBitSliceIndexBitmap().bottomK(l, r, strict));
}
private RoaringBitmap32 fillNulls(
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndex.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndex.java
index 65deca4553..fe2859824a 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndex.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndex.java
@@ -156,20 +156,23 @@ public class RangeBitmapFileIndex implements FileIndexer {
@Override
public FileIndexResult visitTopN(TopN topN, FileIndexResult result) {
List<SortValue> orders = topN.orders();
- if (orders.size() != 1) {
- return FileIndexResult.REMAIN;
- }
+
+ // If multiple columns, use first column with strict=false (allow
duplicates)
+ boolean strict = orders.size() == 1;
+ SortValue sort = orders.get(0);
RoaringBitmap32 foundSet =
result instanceof BitmapIndexResult ? ((BitmapIndexResult)
result).get() : null;
int limit = topN.limit();
- SortValue sort = topN.orders().get(0);
SortValue.NullOrdering nullOrdering = sort.nullOrdering();
+
if (ASCENDING.equals(sort.direction())) {
- return new BitmapIndexResult(() -> bitmap.bottomK(limit,
nullOrdering, foundSet));
+ return new BitmapIndexResult(
+ () -> bitmap.bottomK(limit, nullOrdering, foundSet,
strict));
} else {
- return new BitmapIndexResult(() -> bitmap.topK(limit,
nullOrdering, foundSet));
+ return new BitmapIndexResult(
+ () -> bitmap.topK(limit, nullOrdering, foundSet,
strict));
}
}
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmapTest.java
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmapTest.java
index 997f11eeea..885b98c302 100644
---
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmapTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmapTest.java
@@ -128,7 +128,7 @@ public class BitSliceIndexBitmapTest {
.map(Pair::getKey)
.limit(k)
.forEach(expected::add);
- RoaringBitmap32 actual = bsi.topK(k, null);
+ RoaringBitmap32 actual = bsi.topK(k, null, true);
assertThat(actual).isEqualTo(expected);
// with found set
@@ -151,7 +151,7 @@ public class BitSliceIndexBitmapTest {
.map(Pair::getKey)
.limit(k)
.forEach(expected::add);
- actual = bsi.topK(k, foundSet);
+ actual = bsi.topK(k, foundSet, true);
assertThat(actual).isEqualTo(expected);
}
@@ -172,7 +172,7 @@ public class BitSliceIndexBitmapTest {
.map(Pair::getKey)
.limit(k)
.forEach(expected::add);
- RoaringBitmap32 actual = bsi.bottomK(k, null);
+ RoaringBitmap32 actual = bsi.bottomK(k, null, true);
assertThat(actual).isEqualTo(expected);
// with found set
@@ -195,7 +195,7 @@ public class BitSliceIndexBitmapTest {
.map(Pair::getKey)
.limit(k)
.forEach(expected::add);
- actual = bsi.bottomK(k, foundSet);
+ actual = bsi.bottomK(k, foundSet, true);
assertThat(actual).isEqualTo(expected);
}
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
index 90656c2081..e56bfe7a0d 100644
---
a/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java
@@ -20,11 +20,13 @@ package org.apache.paimon.fileindex.rangebitmap;
import org.apache.paimon.data.BinaryString;
import org.apache.paimon.fileindex.FileIndexReader;
+import org.apache.paimon.fileindex.FileIndexResult;
import org.apache.paimon.fileindex.FileIndexWriter;
import org.apache.paimon.fileindex.bitmap.BitmapIndexResult;
import org.apache.paimon.fs.ByteArraySeekableStream;
import org.apache.paimon.options.Options;
import org.apache.paimon.predicate.FieldRef;
+import org.apache.paimon.predicate.SortValue;
import org.apache.paimon.predicate.TopN;
import org.apache.paimon.types.IntType;
import org.apache.paimon.types.VarCharType;
@@ -35,6 +37,7 @@ import org.junit.jupiter.api.RepeatedTest;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
@@ -469,4 +472,99 @@ public class RangeBitmapFileIndexTest {
assertThat(((BitmapIndexResult) reader.visitGreaterThan(fieldRef,
0)).get())
.isEqualTo(RoaringBitmap32.bitmapOf());
}
+
+ @Test
+ public void testTopNWithMultipleColumns() {
+ IntType intType = new IntType();
+ FieldRef fieldRef1 = new FieldRef(0, "col1", intType);
+ FieldRef fieldRef2 = new FieldRef(1, "col2", intType);
+
+ RangeBitmapFileIndex bitmapFileIndex = new
RangeBitmapFileIndex(intType, new Options());
+ FileIndexWriter writer = bitmapFileIndex.createWriter();
+
+ writer.writeRecord(10);
+ writer.writeRecord(20);
+ writer.writeRecord(30);
+ writer.writeRecord(5);
+ writer.writeRecord(15);
+
+ byte[] bytes = writer.serializedBytes();
+ ByteArraySeekableStream stream = new ByteArraySeekableStream(bytes);
+ FileIndexReader reader = bitmapFileIndex.createReader(stream, 0,
bytes.length);
+
+ List<SortValue> orders =
+ Arrays.asList(
+ new SortValue(fieldRef1, ASCENDING, NULLS_LAST),
+ new SortValue(fieldRef2, DESCENDING, NULLS_LAST));
+ TopN topN = new TopN(orders, 3);
+
+ RoaringBitmap32 foundSet = RoaringBitmap32.bitmapOf(0, 1, 2, 3, 4);
+ FileIndexResult result = reader.visitTopN(topN, new
BitmapIndexResult(() -> foundSet));
+
+ assertThat(result).isInstanceOf(BitmapIndexResult.class);
+ RoaringBitmap32 actual = ((BitmapIndexResult) result).get();
+ assertThat(actual).isEqualTo(RoaringBitmap32.bitmapOf(3, 0, 4)); //
values: 5, 10, 15
+ }
+
+ @Test
+ public void testAllowDuplicatesAscBoundary() {
+ IntType intType = new IntType();
+ FieldRef fieldRef1 = new FieldRef(0, "col1", intType);
+ FieldRef fieldRef2 = new FieldRef(1, "col2", intType);
+
+ RangeBitmapFileIndex bitmapFileIndex = new
RangeBitmapFileIndex(intType, new Options());
+ FileIndexWriter writer = bitmapFileIndex.createWriter();
+ writer.writeRecord(1);
+ writer.writeRecord(1);
+ writer.writeRecord(1);
+ writer.writeRecord(2);
+ writer.writeRecord(3);
+
+ byte[] bytes = writer.serializedBytes();
+ ByteArraySeekableStream stream = new ByteArraySeekableStream(bytes);
+ FileIndexReader reader = bitmapFileIndex.createReader(stream, 0,
bytes.length);
+
+ List<SortValue> orders =
+ Arrays.asList(
+ new SortValue(fieldRef1, ASCENDING, NULLS_LAST),
+ new SortValue(fieldRef2, ASCENDING, NULLS_LAST));
+ TopN topN = new TopN(orders, 2);
+
+ RoaringBitmap32 foundSet = RoaringBitmap32.bitmapOf(0, 1, 2, 3, 4);
+ RoaringBitmap32 actual =
+ ((BitmapIndexResult) reader.visitTopN(topN, new
BitmapIndexResult(() -> foundSet)))
+ .get();
+ assertThat(actual).isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 2));
+ }
+
+ @Test
+ public void testAllowDuplicatesDescBoundary() {
+ IntType intType = new IntType();
+ FieldRef fieldRef1 = new FieldRef(0, "col1", intType);
+ FieldRef fieldRef2 = new FieldRef(1, "col2", intType);
+
+ RangeBitmapFileIndex bitmapFileIndex = new
RangeBitmapFileIndex(intType, new Options());
+ FileIndexWriter writer = bitmapFileIndex.createWriter();
+ writer.writeRecord(5);
+ writer.writeRecord(4);
+ writer.writeRecord(4);
+ writer.writeRecord(4);
+ writer.writeRecord(3);
+
+ byte[] bytes = writer.serializedBytes();
+ ByteArraySeekableStream stream = new ByteArraySeekableStream(bytes);
+ FileIndexReader reader = bitmapFileIndex.createReader(stream, 0,
bytes.length);
+
+ List<SortValue> orders =
+ Arrays.asList(
+ new SortValue(fieldRef1, DESCENDING, NULLS_LAST),
+ new SortValue(fieldRef2, ASCENDING, NULLS_LAST));
+ TopN topN = new TopN(orders, 2);
+
+ RoaringBitmap32 foundSet = RoaringBitmap32.bitmapOf(0, 1, 2, 3, 4);
+ RoaringBitmap32 actual =
+ ((BitmapIndexResult) reader.visitTopN(topN, new
BitmapIndexResult(() -> foundSet)))
+ .get();
+ assertThat(actual).isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 2, 3));
+ }
}