Tan-JiaLiang commented on code in PR #6178: URL: https://github.com/apache/paimon/pull/6178#discussion_r2312850062
########## paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java: ########## @@ -153,6 +153,11 @@ public RoaringBitmap32 gte(int code) { } public RoaringBitmap32 topK(int k, @Nullable RoaringBitmap32 foundSet) { + return topK(k, foundSet, false); + } + + public RoaringBitmap32 topK( + int k, @Nullable RoaringBitmap32 foundSet, boolean allowDuplicates) { Review Comment: What about `allowDuplicates` => `strict`? If strict is true, limit it. Otherwise will include all the same last value. ########## paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java: ########## @@ -178,24 +183,34 @@ public RoaringBitmap32 topK(int k, @Nullable RoaringBitmap32 foundSet) { e = RoaringBitmap32.andNot(e, getSlice(i)); } else { e = RoaringBitmap32.and(e, getSlice(i)); - break; + if (!allowDuplicates) { + break; + } + // If allowDuplicates, continue to include all rows with the same value } } - // only k results should be returned + // Return all results if allowDuplicates, otherwise limit to k RoaringBitmap32 f = RoaringBitmap32.or(g, e); - long n = f.getCardinality() - k; - if (n > 0) { - Iterator<Integer> iterator = e.iterator(); - while (iterator.hasNext() && n > 0) { - f.remove(iterator.next()); - n--; + if (!allowDuplicates) { + long n = f.getCardinality() - k; + if (n > 0) { + Iterator<Integer> iterator = e.iterator(); + while (iterator.hasNext() && n > 0) { + f.remove(iterator.next()); + n--; + } } } return f; } public RoaringBitmap32 bottomK(int k, @Nullable RoaringBitmap32 foundSet) { + return bottomK(k, foundSet, false); + } + + public RoaringBitmap32 bottomK( + int k, @Nullable RoaringBitmap32 foundSet, boolean allowDuplicates) { Review Comment: Same the topK. ########## paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java: ########## @@ -153,6 +153,11 @@ public RoaringBitmap32 gte(int code) { } public RoaringBitmap32 topK(int k, @Nullable RoaringBitmap32 foundSet) { + return topK(k, foundSet, false); Review Comment: This method is no longer necessary. ########## paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndex.java: ########## @@ -156,20 +156,24 @@ public FileIndexResult visitGreaterOrEqual(FieldRef fieldRef, Object literal) { @Override public FileIndexResult visitTopN(TopN topN, FileIndexResult result) { List<SortValue> orders = topN.orders(); - if (orders.size() != 1) { - return FileIndexResult.REMAIN; - } + + // If multiple columns, use first column with allowDuplicates=true + boolean useFirstColumn = orders.size() > 1; + SortValue sort = orders.get(0); // Always use first column + boolean allowDuplicates = useFirstColumn; Review Comment: Just `allowDuplicates` => `strict` ########## paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java: ########## @@ -281,13 +281,36 @@ private BitSliceIndexBitmap getBitSliceIndexBitmap() { public RoaringBitmap32 topK( int k, SortValue.NullOrdering nullOrdering, @Nullable RoaringBitmap32 foundSet) { - return fillNulls(k, nullOrdering, foundSet, (l, r) -> getBitSliceIndexBitmap().topK(l, r)); + return topK(k, nullOrdering, foundSet, false); Review Comment: This method is no longer necessary. ########## paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java: ########## @@ -281,13 +281,36 @@ private BitSliceIndexBitmap getBitSliceIndexBitmap() { public RoaringBitmap32 topK( int k, SortValue.NullOrdering nullOrdering, @Nullable RoaringBitmap32 foundSet) { - return fillNulls(k, nullOrdering, foundSet, (l, r) -> getBitSliceIndexBitmap().topK(l, r)); + return topK(k, nullOrdering, foundSet, false); + } + + public RoaringBitmap32 topK( + int k, + SortValue.NullOrdering nullOrdering, + @Nullable RoaringBitmap32 foundSet, + boolean allowDuplicates) { + return fillNulls( + k, + nullOrdering, + foundSet, + (l, r) -> getBitSliceIndexBitmap().topK(l, r, allowDuplicates)); } public RoaringBitmap32 bottomK( int k, SortValue.NullOrdering nullOrdering, @Nullable RoaringBitmap32 foundSet) { + return bottomK(k, nullOrdering, foundSet, false); Review Comment: Same as topk. ########## paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java: ########## @@ -178,24 +183,34 @@ public RoaringBitmap32 topK(int k, @Nullable RoaringBitmap32 foundSet) { e = RoaringBitmap32.andNot(e, getSlice(i)); } else { e = RoaringBitmap32.and(e, getSlice(i)); - break; + if (!allowDuplicates) { + break; + } + // If allowDuplicates, continue to include all rows with the same value } } - // only k results should be returned + // Return all results if allowDuplicates, otherwise limit to k RoaringBitmap32 f = RoaringBitmap32.or(g, e); - long n = f.getCardinality() - k; - if (n > 0) { - Iterator<Integer> iterator = e.iterator(); - while (iterator.hasNext() && n > 0) { - f.remove(iterator.next()); - n--; + if (!allowDuplicates) { + long n = f.getCardinality() - k; + if (n > 0) { + Iterator<Integer> iterator = e.iterator(); + while (iterator.hasNext() && n > 0) { + f.remove(iterator.next()); + n--; + } } } return f; } public RoaringBitmap32 bottomK(int k, @Nullable RoaringBitmap32 foundSet) { + return bottomK(k, foundSet, false); Review Comment: Same as topk. ########## paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java: ########## @@ -34,13 +36,7 @@ import org.junit.jupiter.api.RepeatedTest; import org.junit.jupiter.api.Test; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashSet; -import java.util.List; -import java.util.Objects; -import java.util.Random; -import java.util.Set; +import java.util.*; Review Comment: Keep import as needed instead of import *. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@paimon.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org