This is an automated email from the ASF dual-hosted git repository.

xiangfu0 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 09309dd0769 [Bug fix] Skip flushing empty chunk in 
OffHeapH3IndexCreator (#18834)
09309dd0769 is described below

commit 09309dd0769007b0c3c5170cdf2e6c998b2ddb3d
Author: Jhow <[email protected]>
AuthorDate: Tue Jun 23 00:27:07 2026 -0700

    [Bug fix] Skip flushing empty chunk in OffHeapH3IndexCreator (#18834)
    
    * skip flushing when no entry is in posting list
    
    * move the guard to the caller
    
    * make the flush condition clean
---
 .../impl/inv/geospatial/OffHeapH3IndexCreator.java |  9 ++++--
 .../segment/local/segment/index/H3IndexTest.java   | 35 ++++++++++++++++++++++
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/geospatial/OffHeapH3IndexCreator.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/geospatial/OffHeapH3IndexCreator.java
index 72fb3ddf066..dc1ab450a16 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/geospatial/OffHeapH3IndexCreator.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/geospatial/OffHeapH3IndexCreator.java
@@ -73,7 +73,7 @@ public class OffHeapH3IndexCreator extends BaseH3IndexCreator 
{
   public void add(@Nullable Geometry geometry)
       throws IOException {
     super.add(geometry);
-    if (_postingListMap.size() % FLUSH_THRESHOLD == 0) {
+    if (_postingListMap.size() == FLUSH_THRESHOLD) {
       flush();
     }
   }
@@ -113,7 +113,7 @@ public class OffHeapH3IndexCreator extends 
BaseH3IndexCreator {
     }
 
     // Flush the last chunk
-    if (_postingListMap.size() % FLUSH_THRESHOLD != 0) {
+    if (!_postingListMap.isEmpty()) {
       flush();
     }
     _postingListOutputStream.close();
@@ -135,7 +135,10 @@ public class OffHeapH3IndexCreator extends 
BaseH3IndexCreator {
       // Merge posting lists from the chunk iterators
       PriorityQueue<PostingListEntry> priorityQueue = new 
PriorityQueue<>(numChunks);
       for (ChunkIterator chunkIterator : chunkIterators) {
-        priorityQueue.offer(chunkIterator.next());
+        // Defensive: skip empty chunks instead of blindly reading the first 
entry (flush() should never produce one)
+        if (chunkIterator.hasNext()) {
+          priorityQueue.offer(chunkIterator.next());
+        }
       }
       long currentH3Id = Long.MIN_VALUE;
       MutableRoaringBitmap currentDocIds = null;
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/H3IndexTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/H3IndexTest.java
index 2ea6ae55e64..4db35b2d738 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/H3IndexTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/H3IndexTest.java
@@ -262,6 +262,41 @@ public class H3IndexTest implements 
PinotBuffersAfterMethodCheckRule {
     }
   }
 
+  @Test
+  public void testOffHeapSkipAtEmptyBufferDoesNotCreateEmptyChunk()
+      throws Exception {
+    // Regression test for an IndexOutOfBoundsException in 
OffHeapH3IndexCreator.seal(). When a record is skipped
+    // (e.g. a null/invalid geometry tolerated via continueOnError) while the 
in-memory posting list buffer is empty
+    // -- which is the case for the very first record and immediately after 
each flush -- the flush trigger (buffer
+    // size being a multiple of FLUSH_THRESHOLD, which includes 0) used to 
append a zero-length chunk. That empty
+    // chunk produced an empty ChunkIterator that was read unconditionally 
during the merge in seal(), throwing.
+    String columnName = "offHeapSkipAtEmptyBuffer";
+    int res = 5;
+    H3IndexResolution resolution = new 
H3IndexResolution(Collections.singletonList(res));
+
+    try (GeoSpatialIndexCreator creator = new OffHeapH3IndexCreator(TEMP_DIR, 
columnName, "myTable_OFFLINE", true,
+        resolution)) {
+      // Skip the very first record while the buffer is empty. This used to 
create a zero-length leading chunk, which
+      // also pushed seal() into the multi-chunk merge path where the empty 
chunk was read and threw.
+      creator.add(null);
+      creator.add(GeometryUtils.GEOMETRY_FACTORY.createPoint(new 
Coordinate(10, 20)));
+      creator.add(GeometryUtils.GEOMETRY_FACTORY.createPoint(new 
Coordinate(30, 40)));
+
+      // seal() must not throw and must produce a valid, readable index.
+      creator.seal();
+    }
+
+    File indexFile = new File(TEMP_DIR, columnName + 
V1Constants.Indexes.H3_INDEX_FILE_EXTENSION);
+    try (PinotDataBuffer buffer = 
PinotDataBuffer.mapReadOnlyBigEndianFile(indexFile);
+        H3IndexReader reader = new ImmutableH3IndexReader(buffer)) {
+      // docId 0 was the skipped record; the two valid points are docIds 1 and 
2.
+      Assert.assertEquals(reader.getDocIds(H3Utils.H3_CORE.latLngToCell(20, 
10, res)),
+          ImmutableRoaringBitmap.bitmapOf(1));
+      Assert.assertEquals(reader.getDocIds(H3Utils.H3_CORE.latLngToCell(40, 
30, res)),
+          ImmutableRoaringBitmap.bitmapOf(2));
+    }
+  }
+
   @Test
   public void testSkipInvalidGeometryContinueOnErrorFalse()
       throws Exception {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to