Tan-JiaLiang commented on code in PR #6178:
URL: https://github.com/apache/paimon/pull/6178#discussion_r2312850062


##########
paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java:
##########
@@ -153,6 +153,11 @@ public RoaringBitmap32 gte(int code) {
     }
 
     public RoaringBitmap32 topK(int k, @Nullable RoaringBitmap32 foundSet) {
+        return topK(k, foundSet, false);
+    }
+
+    public RoaringBitmap32 topK(
+            int k, @Nullable RoaringBitmap32 foundSet, boolean 
allowDuplicates) {

Review Comment:
   What about `allowDuplicates` => `strict`? 
   If strict is true, limit it.
   Otherwise will include all the same last value.



##########
paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java:
##########
@@ -178,24 +183,34 @@ public RoaringBitmap32 topK(int k, @Nullable 
RoaringBitmap32 foundSet) {
                 e = RoaringBitmap32.andNot(e, getSlice(i));
             } else {
                 e = RoaringBitmap32.and(e, getSlice(i));
-                break;
+                if (!allowDuplicates) {
+                    break;
+                }
+                // If allowDuplicates, continue to include all rows with the 
same value
             }
         }
 
-        // only k results should be returned
+        // Return all results if allowDuplicates, otherwise limit to k
         RoaringBitmap32 f = RoaringBitmap32.or(g, e);
-        long n = f.getCardinality() - k;
-        if (n > 0) {
-            Iterator<Integer> iterator = e.iterator();
-            while (iterator.hasNext() && n > 0) {
-                f.remove(iterator.next());
-                n--;
+        if (!allowDuplicates) {
+            long n = f.getCardinality() - k;
+            if (n > 0) {
+                Iterator<Integer> iterator = e.iterator();
+                while (iterator.hasNext() && n > 0) {
+                    f.remove(iterator.next());
+                    n--;
+                }
             }
         }
         return f;
     }
 
     public RoaringBitmap32 bottomK(int k, @Nullable RoaringBitmap32 foundSet) {
+        return bottomK(k, foundSet, false);
+    }
+
+    public RoaringBitmap32 bottomK(
+            int k, @Nullable RoaringBitmap32 foundSet, boolean 
allowDuplicates) {

Review Comment:
   Same the topK.



##########
paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java:
##########
@@ -153,6 +153,11 @@ public RoaringBitmap32 gte(int code) {
     }
 
     public RoaringBitmap32 topK(int k, @Nullable RoaringBitmap32 foundSet) {
+        return topK(k, foundSet, false);

Review Comment:
   This method is no longer necessary.



##########
paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndex.java:
##########
@@ -156,20 +156,24 @@ public FileIndexResult visitGreaterOrEqual(FieldRef 
fieldRef, Object literal) {
         @Override
         public FileIndexResult visitTopN(TopN topN, FileIndexResult result) {
             List<SortValue> orders = topN.orders();
-            if (orders.size() != 1) {
-                return FileIndexResult.REMAIN;
-            }
+
+            // If multiple columns, use first column with allowDuplicates=true
+            boolean useFirstColumn = orders.size() > 1;
+            SortValue sort = orders.get(0); // Always use first column
+            boolean allowDuplicates = useFirstColumn;

Review Comment:
   Just `allowDuplicates` => `strict`



##########
paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java:
##########
@@ -281,13 +281,36 @@ private BitSliceIndexBitmap getBitSliceIndexBitmap() {
 
     public RoaringBitmap32 topK(
             int k, SortValue.NullOrdering nullOrdering, @Nullable 
RoaringBitmap32 foundSet) {
-        return fillNulls(k, nullOrdering, foundSet, (l, r) -> 
getBitSliceIndexBitmap().topK(l, r));
+        return topK(k, nullOrdering, foundSet, false);

Review Comment:
   This method is no longer necessary.



##########
paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmap.java:
##########
@@ -281,13 +281,36 @@ private BitSliceIndexBitmap getBitSliceIndexBitmap() {
 
     public RoaringBitmap32 topK(
             int k, SortValue.NullOrdering nullOrdering, @Nullable 
RoaringBitmap32 foundSet) {
-        return fillNulls(k, nullOrdering, foundSet, (l, r) -> 
getBitSliceIndexBitmap().topK(l, r));
+        return topK(k, nullOrdering, foundSet, false);
+    }
+
+    public RoaringBitmap32 topK(
+            int k,
+            SortValue.NullOrdering nullOrdering,
+            @Nullable RoaringBitmap32 foundSet,
+            boolean allowDuplicates) {
+        return fillNulls(
+                k,
+                nullOrdering,
+                foundSet,
+                (l, r) -> getBitSliceIndexBitmap().topK(l, r, 
allowDuplicates));
     }
 
     public RoaringBitmap32 bottomK(
             int k, SortValue.NullOrdering nullOrdering, @Nullable 
RoaringBitmap32 foundSet) {
+        return bottomK(k, nullOrdering, foundSet, false);

Review Comment:
   Same as topk.



##########
paimon-common/src/main/java/org/apache/paimon/fileindex/rangebitmap/BitSliceIndexBitmap.java:
##########
@@ -178,24 +183,34 @@ public RoaringBitmap32 topK(int k, @Nullable 
RoaringBitmap32 foundSet) {
                 e = RoaringBitmap32.andNot(e, getSlice(i));
             } else {
                 e = RoaringBitmap32.and(e, getSlice(i));
-                break;
+                if (!allowDuplicates) {
+                    break;
+                }
+                // If allowDuplicates, continue to include all rows with the 
same value
             }
         }
 
-        // only k results should be returned
+        // Return all results if allowDuplicates, otherwise limit to k
         RoaringBitmap32 f = RoaringBitmap32.or(g, e);
-        long n = f.getCardinality() - k;
-        if (n > 0) {
-            Iterator<Integer> iterator = e.iterator();
-            while (iterator.hasNext() && n > 0) {
-                f.remove(iterator.next());
-                n--;
+        if (!allowDuplicates) {
+            long n = f.getCardinality() - k;
+            if (n > 0) {
+                Iterator<Integer> iterator = e.iterator();
+                while (iterator.hasNext() && n > 0) {
+                    f.remove(iterator.next());
+                    n--;
+                }
             }
         }
         return f;
     }
 
     public RoaringBitmap32 bottomK(int k, @Nullable RoaringBitmap32 foundSet) {
+        return bottomK(k, foundSet, false);

Review Comment:
   Same as topk.



##########
paimon-common/src/test/java/org/apache/paimon/fileindex/rangebitmap/RangeBitmapFileIndexTest.java:
##########
@@ -34,13 +36,7 @@
 import org.junit.jupiter.api.RepeatedTest;
 import org.junit.jupiter.api.Test;
 
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Objects;
-import java.util.Random;
-import java.util.Set;
+import java.util.*;

Review Comment:
   Keep import as needed instead of import *.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@paimon.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to