This is an automated email from the ASF dual-hosted git repository.

yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 6d319a5715a Avoid cloning the first bitmap when intersecting 
bitmap-based doc id sets in AndDocIdSet (#18912)
6d319a5715a is described below

commit 6d319a5715adbdf82251305de58347ebe2efa552
Author: Gonzalo Ortiz Jaureguizar <[email protected]>
AuthorDate: Thu Jul 2 20:58:27 2026 +0200

    Avoid cloning the first bitmap when intersecting bitmap-based doc id sets 
in AndDocIdSet (#18912)
---
 .../pinot/core/operator/docidsets/AndDocIdSet.java |   9 +-
 .../core/operator/docidsets/AndDocIdSetTest.java   | 128 +++++++++++++++++++++
 2 files changed, 135 insertions(+), 2 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
index a2a7c3294d2..616b1a4542f 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
@@ -157,8 +157,13 @@ public final class AndDocIdSet implements BlockDocIdSet {
         if (numBitmapBasedDocIdIterators == 1) {
           docIds = bitmapBasedDocIdIterators.get(0).getDocIds();
         } else {
-          MutableRoaringBitmap mutableDocIds = 
bitmapBasedDocIdIterators.get(0).getDocIds().toMutableRoaringBitmap();
-          for (int i = 1; i < numBitmapBasedDocIdIterators; i++) {
+          // NOTE: Intersect the two lowest-cardinality bitmaps (guaranteed by 
the sort above) into a fresh result
+          //       with the static and(), which allocates only the 
intersection's containers, then intersect the
+          //       remaining bitmaps into it in place (the intersection is 
usually much smaller than any operand).
+          //       Inputs are never mutated.
+          MutableRoaringBitmap mutableDocIds = ImmutableRoaringBitmap.and(
+              bitmapBasedDocIdIterators.get(0).getDocIds(), 
bitmapBasedDocIdIterators.get(1).getDocIds());
+          for (int i = 2; i < numBitmapBasedDocIdIterators; i++) {
             mutableDocIds.and(bitmapBasedDocIdIterators.get(i).getDocIds());
           }
           docIds = mutableDocIds;
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/docidsets/AndDocIdSetTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/docidsets/AndDocIdSetTest.java
new file mode 100644
index 00000000000..1c6dc391fd9
--- /dev/null
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/docidsets/AndDocIdSetTest.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.docidsets;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import org.apache.pinot.core.common.BlockDocIdIterator;
+import org.apache.pinot.core.common.BlockDocIdSet;
+import org.apache.pinot.segment.spi.Constants;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+/**
+ * Unit test for {@link AndDocIdSet}, focusing on the bitmap-only intersection 
path.
+ */
+public class AndDocIdSetTest {
+  private static final long RANDOM_SEED = System.currentTimeMillis();
+  private static final Random RANDOM = new Random(RANDOM_SEED);
+  private static final String ERROR_MESSAGE = "Random seed: " + RANDOM_SEED;
+  private static final int NUM_DOCS = 100000;
+
+  @Test
+  public void testBitmapOnlyIntersection() {
+    // Cover both the 2-bitmap and the 3+-bitmap paths, mixing operand shapes: 
dense (bitmap containers),
+    // sparse (array containers), run-optimized, and a serialized 
buffer-backed ImmutableRoaringBitmap (the
+    // shape produced by memory-mapped index readers).
+    for (int numBitmaps = 2; numBitmaps <= 4; numBitmaps++) {
+      ImmutableRoaringBitmap[] bitmaps = new 
ImmutableRoaringBitmap[numBitmaps];
+      MutableRoaringBitmap expected = null;
+      for (int i = 0; i < numBitmaps; i++) {
+        MutableRoaringBitmap bitmap = randomBitmap(i == 1 ? 0.001 : 0.2 + 0.2 
* i);
+        switch (i % 4) {
+          case 1:
+            // Sparse bitmap left as-is (array containers)
+            bitmaps[i] = bitmap;
+            break;
+          case 2:
+            bitmap.runOptimize();
+            bitmaps[i] = bitmap;
+            break;
+          case 3:
+            bitmaps[i] = serialize(bitmap);
+            break;
+          default:
+            bitmaps[i] = bitmap;
+            break;
+        }
+        expected = expected == null ? bitmaps[i].toMutableRoaringBitmap()
+            : MutableRoaringBitmap.and(expected, 
bitmaps[i].toMutableRoaringBitmap());
+      }
+      ImmutableRoaringBitmap[] copies = new ImmutableRoaringBitmap[numBitmaps];
+      List<BlockDocIdSet> docIdSets = new ArrayList<>(numBitmaps);
+      for (int i = 0; i < numBitmaps; i++) {
+        copies[i] = bitmaps[i].toMutableRoaringBitmap();
+        docIdSets.add(new BitmapDocIdSet(bitmaps[i], NUM_DOCS));
+      }
+
+      int[] actualDocIds = collectDocIds(new AndDocIdSet(docIdSets, null));
+
+      assertEquals(actualDocIds, expected.toArray(), ERROR_MESSAGE);
+      // The intersection must not mutate the input bitmaps (they may be 
shared or backed by index buffers)
+      for (int i = 0; i < numBitmaps; i++) {
+        assertEquals(bitmaps[i].toMutableRoaringBitmap(), copies[i], 
ERROR_MESSAGE);
+      }
+    }
+  }
+
+  @Test
+  public void testBitmapOnlyIntersectionEmptyResult() {
+    MutableRoaringBitmap first = MutableRoaringBitmap.bitmapOf(1, 3, 5);
+    MutableRoaringBitmap second = MutableRoaringBitmap.bitmapOf(0, 2, 4);
+    List<BlockDocIdSet> docIdSets =
+        List.of(new BitmapDocIdSet(first, NUM_DOCS), new 
BitmapDocIdSet(second, NUM_DOCS));
+
+    int[] actualDocIds = collectDocIds(new AndDocIdSet(docIdSets, null));
+
+    assertEquals(actualDocIds.length, 0);
+  }
+
+  private static MutableRoaringBitmap randomBitmap(double density) {
+    MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+    for (int docId = 0; docId < NUM_DOCS; docId++) {
+      if (RANDOM.nextDouble() < density) {
+        bitmap.add(docId);
+      }
+    }
+    return bitmap;
+  }
+
+  private static ImmutableRoaringBitmap serialize(MutableRoaringBitmap bitmap) 
{
+    ByteBuffer buffer = ByteBuffer.allocate(bitmap.serializedSizeInBytes());
+    bitmap.serialize(buffer);
+    buffer.flip();
+    return new ImmutableRoaringBitmap(buffer);
+  }
+
+  private static int[] collectDocIds(BlockDocIdSet docIdSet) {
+    BlockDocIdIterator iterator = docIdSet.iterator();
+    List<Integer> docIds = new ArrayList<>();
+    int docId;
+    while ((docId = iterator.next()) != Constants.EOF) {
+      docIds.add(docId);
+    }
+    return docIds.stream().mapToInt(Integer::intValue).toArray();
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to