This is an automated email from the ASF dual-hosted git repository.
xiangfu0 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 09309dd0769 [Bug fix] Skip flushing empty chunk in
OffHeapH3IndexCreator (#18834)
09309dd0769 is described below
commit 09309dd0769007b0c3c5170cdf2e6c998b2ddb3d
Author: Jhow <[email protected]>
AuthorDate: Tue Jun 23 00:27:07 2026 -0700
[Bug fix] Skip flushing empty chunk in OffHeapH3IndexCreator (#18834)
* skip flushing when no entry is in posting list
* move the guard to the caller
* make the flush condition clean
---
.../impl/inv/geospatial/OffHeapH3IndexCreator.java | 9 ++++--
.../segment/local/segment/index/H3IndexTest.java | 35 ++++++++++++++++++++++
2 files changed, 41 insertions(+), 3 deletions(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/geospatial/OffHeapH3IndexCreator.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/geospatial/OffHeapH3IndexCreator.java
index 72fb3ddf066..dc1ab450a16 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/geospatial/OffHeapH3IndexCreator.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/geospatial/OffHeapH3IndexCreator.java
@@ -73,7 +73,7 @@ public class OffHeapH3IndexCreator extends BaseH3IndexCreator
{
public void add(@Nullable Geometry geometry)
throws IOException {
super.add(geometry);
- if (_postingListMap.size() % FLUSH_THRESHOLD == 0) {
+ if (_postingListMap.size() == FLUSH_THRESHOLD) {
flush();
}
}
@@ -113,7 +113,7 @@ public class OffHeapH3IndexCreator extends
BaseH3IndexCreator {
}
// Flush the last chunk
- if (_postingListMap.size() % FLUSH_THRESHOLD != 0) {
+ if (!_postingListMap.isEmpty()) {
flush();
}
_postingListOutputStream.close();
@@ -135,7 +135,10 @@ public class OffHeapH3IndexCreator extends
BaseH3IndexCreator {
// Merge posting lists from the chunk iterators
PriorityQueue<PostingListEntry> priorityQueue = new
PriorityQueue<>(numChunks);
for (ChunkIterator chunkIterator : chunkIterators) {
- priorityQueue.offer(chunkIterator.next());
+ // Defensive: skip empty chunks instead of blindly reading the first
entry (flush() should never produce one)
+ if (chunkIterator.hasNext()) {
+ priorityQueue.offer(chunkIterator.next());
+ }
}
long currentH3Id = Long.MIN_VALUE;
MutableRoaringBitmap currentDocIds = null;
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/H3IndexTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/H3IndexTest.java
index 2ea6ae55e64..4db35b2d738 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/H3IndexTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/H3IndexTest.java
@@ -262,6 +262,41 @@ public class H3IndexTest implements
PinotBuffersAfterMethodCheckRule {
}
}
+ @Test
+ public void testOffHeapSkipAtEmptyBufferDoesNotCreateEmptyChunk()
+ throws Exception {
+ // Regression test for an IndexOutOfBoundsException in
OffHeapH3IndexCreator.seal(). When a record is skipped
+ // (e.g. a null/invalid geometry tolerated via continueOnError) while the
in-memory posting list buffer is empty
+ // -- which is the case for the very first record and immediately after
each flush -- the flush trigger (buffer
+ // size being a multiple of FLUSH_THRESHOLD, which includes 0) used to
append a zero-length chunk. That empty
+ // chunk produced an empty ChunkIterator that was read unconditionally
during the merge in seal(), throwing.
+ String columnName = "offHeapSkipAtEmptyBuffer";
+ int res = 5;
+ H3IndexResolution resolution = new
H3IndexResolution(Collections.singletonList(res));
+
+ try (GeoSpatialIndexCreator creator = new OffHeapH3IndexCreator(TEMP_DIR,
columnName, "myTable_OFFLINE", true,
+ resolution)) {
+ // Skip the very first record while the buffer is empty. This used to
create a zero-length leading chunk, which
+ // also pushed seal() into the multi-chunk merge path where the empty
chunk was read and threw.
+ creator.add(null);
+ creator.add(GeometryUtils.GEOMETRY_FACTORY.createPoint(new
Coordinate(10, 20)));
+ creator.add(GeometryUtils.GEOMETRY_FACTORY.createPoint(new
Coordinate(30, 40)));
+
+ // seal() must not throw and must produce a valid, readable index.
+ creator.seal();
+ }
+
+ File indexFile = new File(TEMP_DIR, columnName +
V1Constants.Indexes.H3_INDEX_FILE_EXTENSION);
+ try (PinotDataBuffer buffer =
PinotDataBuffer.mapReadOnlyBigEndianFile(indexFile);
+ H3IndexReader reader = new ImmutableH3IndexReader(buffer)) {
+ // docId 0 was the skipped record; the two valid points are docIds 1 and
2.
+ Assert.assertEquals(reader.getDocIds(H3Utils.H3_CORE.latLngToCell(20,
10, res)),
+ ImmutableRoaringBitmap.bitmapOf(1));
+ Assert.assertEquals(reader.getDocIds(H3Utils.H3_CORE.latLngToCell(40,
30, res)),
+ ImmutableRoaringBitmap.bitmapOf(2));
+ }
+ }
+
@Test
public void testSkipInvalidGeometryContinueOnErrorFalse()
throws Exception {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]