This is an automated email from the ASF dual-hosted git repository.
Jackie-Jiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 9474069ebeb Revert "Fix UnsupportedOperationException in
SVScanDocIdIterator for RAW forward index + separate dictionary (#18579)"
(#18590)
9474069ebeb is described below
commit 9474069ebeb0c925b93cf585a8c95da00f7cbbdb
Author: Chaitanya Deepthi <[email protected]>
AuthorDate: Wed May 27 16:44:50 2026 -0700
Revert "Fix UnsupportedOperationException in SVScanDocIdIterator for RAW
forward index + separate dictionary (#18579)" (#18590)
This reverts commit acf9d4ff5d32491d0a2d19055424e270999ae14a.
---
.../dociditerators/SVScanDocIdIterator.java | 115 ++----------------
.../dociditerators/SVScanDocIdIteratorTest.java | 130 ---------------------
2 files changed, 8 insertions(+), 237 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
index 6ea08e74e64..da718f279c9 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
@@ -18,19 +18,14 @@
*/
package org.apache.pinot.core.operator.dociditerators;
-import java.math.BigDecimal;
import java.util.Map;
import java.util.OptionalInt;
-import javax.annotation.Nullable;
import org.apache.pinot.core.common.BlockDocIdIterator;
-import
org.apache.pinot.core.operator.filter.predicate.BaseDictionaryBasedPredicateEvaluator;
import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
import org.apache.pinot.segment.spi.Constants;
import org.apache.pinot.segment.spi.datasource.DataSource;
-import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
-import org.apache.pinot.spi.utils.ByteArray;
import org.roaringbitmap.BatchIterator;
import org.roaringbitmap.RoaringBitmapWriter;
import org.roaringbitmap.buffer.MutableRoaringBitmap;
@@ -45,11 +40,6 @@ public final class SVScanDocIdIterator implements
ScanBasedDocIdIterator {
private final PredicateEvaluator _predicateEvaluator;
private final ForwardIndexReader _reader;
private final ForwardIndexReaderContext _readerContext;
- // Dictionary may exist even when the forward index is RAW (built only for
secondary indexes such as INVERTED /
- // FST / IFST / RANGE). When set together with a dictionary-based predicate
evaluator, scans translate raw values
- // to dict ids via this dictionary instead of calling applySV(<rawType>) on
the evaluator (which would throw).
- @Nullable
- private final Dictionary _dictionary;
private final int _numDocs;
private final ValueMatcher _valueMatcher;
private final int[] _batch;
@@ -66,7 +56,6 @@ public final class SVScanDocIdIterator implements
ScanBasedDocIdIterator {
_predicateEvaluator = predicateEvaluator;
_reader = dataSource.getForwardIndex();
_readerContext = _reader.createContext(queryOptions);
- _dictionary = dataSource.getDictionary();
_numDocs = numDocs;
_valueMatcher = getValueMatcher();
_cardinality = dataSource.getDataSourceMetadata().getCardinality();
@@ -74,17 +63,10 @@ public final class SVScanDocIdIterator implements
ScanBasedDocIdIterator {
// for testing
public SVScanDocIdIterator(PredicateEvaluator predicateEvaluator,
ForwardIndexReader reader, int numDocs) {
- this(predicateEvaluator, reader, numDocs, null);
- }
-
- // for testing
- public SVScanDocIdIterator(PredicateEvaluator predicateEvaluator,
ForwardIndexReader reader, int numDocs,
- @Nullable Dictionary dictionary) {
_batch = new int[BlockDocIdIterator.OPTIMAL_ITERATOR_BATCH_SIZE];
_predicateEvaluator = predicateEvaluator;
_reader = reader;
_readerContext = reader.createContext();
- _dictionary = dictionary;
_numDocs = numDocs;
_valueMatcher = getValueMatcher();
_cardinality = -1;
@@ -184,48 +166,26 @@ public final class SVScanDocIdIterator implements
ScanBasedDocIdIterator {
private ValueMatcher getValueMatcher() {
if (_reader.isDictionaryEncoded()) {
return new DictIdMatcher();
- }
- // RAW forward index, but a separate dictionary exists and the predicate
evaluator is dict-based. Translate raw
- // values to dict ids on read so applySV(int dictId) can be used. This is
needed because dict-based evaluators
- // (e.g. DictIdBasedRegexpLikePredicateEvaluator,
IFSTBasedRegexpPredicateEvaluator) only implement applySV(int).
- if (_dictionary != null && _predicateEvaluator instanceof
BaseDictionaryBasedPredicateEvaluator) {
+ } else {
switch (_reader.getStoredType()) {
case INT:
- return new IntDictLookupMatcher();
+ return new IntMatcher();
case LONG:
- return new LongDictLookupMatcher();
+ return new LongMatcher();
case FLOAT:
- return new FloatDictLookupMatcher();
+ return new FloatMatcher();
case DOUBLE:
- return new DoubleDictLookupMatcher();
+ return new DoubleMatcher();
case BIG_DECIMAL:
- return new BigDecimalDictLookupMatcher();
+ return new BigDecimalMatcher();
case STRING:
- return new StringDictLookupMatcher();
+ return new StringMatcher();
case BYTES:
- return new BytesDictLookupMatcher();
+ return new BytesMatcher();
default:
throw new UnsupportedOperationException();
}
}
- switch (_reader.getStoredType()) {
- case INT:
- return new IntMatcher();
- case LONG:
- return new LongMatcher();
- case FLOAT:
- return new FloatMatcher();
- case DOUBLE:
- return new DoubleMatcher();
- case BIG_DECIMAL:
- return new BigDecimalMatcher();
- case STRING:
- return new StringMatcher();
- case BYTES:
- return new BytesMatcher();
- default:
- throw new UnsupportedOperationException();
- }
}
private interface ValueMatcher {
@@ -357,65 +317,6 @@ public final class SVScanDocIdIterator implements
ScanBasedDocIdIterator {
}
}
- // ----- Dict-lookup matchers: forward index is RAW but a separate
dictionary exists. -----
-
- private class IntDictLookupMatcher implements ValueMatcher {
- @Override
- public boolean doesValueMatch(int docId) {
- int dictId = _dictionary.indexOf(_reader.getInt(docId, _readerContext));
- return dictId >= 0 && _predicateEvaluator.applySV(dictId);
- }
- }
-
- private class LongDictLookupMatcher implements ValueMatcher {
- @Override
- public boolean doesValueMatch(int docId) {
- int dictId = _dictionary.indexOf(_reader.getLong(docId, _readerContext));
- return dictId >= 0 && _predicateEvaluator.applySV(dictId);
- }
- }
-
- private class FloatDictLookupMatcher implements ValueMatcher {
- @Override
- public boolean doesValueMatch(int docId) {
- int dictId = _dictionary.indexOf(_reader.getFloat(docId,
_readerContext));
- return dictId >= 0 && _predicateEvaluator.applySV(dictId);
- }
- }
-
- private class DoubleDictLookupMatcher implements ValueMatcher {
- @Override
- public boolean doesValueMatch(int docId) {
- int dictId = _dictionary.indexOf(_reader.getDouble(docId,
_readerContext));
- return dictId >= 0 && _predicateEvaluator.applySV(dictId);
- }
- }
-
- private class BigDecimalDictLookupMatcher implements ValueMatcher {
- @Override
- public boolean doesValueMatch(int docId) {
- BigDecimal value = _reader.getBigDecimal(docId, _readerContext);
- int dictId = _dictionary.indexOf(value);
- return dictId >= 0 && _predicateEvaluator.applySV(dictId);
- }
- }
-
- private class StringDictLookupMatcher implements ValueMatcher {
- @Override
- public boolean doesValueMatch(int docId) {
- int dictId = _dictionary.indexOf(_reader.getString(docId,
_readerContext));
- return dictId >= 0 && _predicateEvaluator.applySV(dictId);
- }
- }
-
- private class BytesDictLookupMatcher implements ValueMatcher {
- @Override
- public boolean doesValueMatch(int docId) {
- int dictId = _dictionary.indexOf(new ByteArray(_reader.getBytes(docId,
_readerContext)));
- return dictId >= 0 && _predicateEvaluator.applySV(dictId);
- }
- }
-
@Override
public void close() {
if (_readerContext != null) {
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIteratorTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIteratorTest.java
deleted file mode 100644
index a2e499ee3da..00000000000
---
a/pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIteratorTest.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.core.operator.dociditerators;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import org.apache.pinot.common.request.context.predicate.Predicate;
-import
org.apache.pinot.core.operator.filter.predicate.BaseDictionaryBasedPredicateEvaluator;
-import org.apache.pinot.segment.spi.Constants;
-import org.apache.pinot.segment.spi.index.reader.Dictionary;
-import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
-import org.apache.pinot.spi.data.FieldSpec.DataType;
-import org.mockito.Mockito;
-import org.testng.annotations.Test;
-
-import static org.testng.Assert.assertEquals;
-
-
-public class SVScanDocIdIteratorTest {
-
- /// RAW forward index + separate dictionary + dictionary-based predicate
evaluator
- /// (the layout used by external/iceberg-backed tables with
IFST/FST/inverted-needing-dict).
- /// Without the DictLookupMatcher path, the scan would select
`StringMatcher` and call
- /// `applySV(String)` on the dict-based evaluator, throwing
`UnsupportedOperationException`.
- @Test
- public void rawForwardWithDictAndDictBasedEvaluatorRoutesThroughDictLookup()
{
- String[] values = {"apple", "banana", "cherry", "apple", "date", "banana"};
- Map<String, Integer> dictEntries = new HashMap<>();
- dictEntries.put("apple", 0);
- dictEntries.put("banana", 1);
- dictEntries.put("cherry", 2);
- dictEntries.put("date", 3);
-
- ForwardIndexReader reader = mockRawStringReader(values);
- Dictionary dictionary = mockStringDictionary(dictEntries);
- // Matches dict ids {0, 2} → values "apple" and "cherry".
- BaseDictionaryBasedPredicateEvaluator evaluator = new
TestDictPredicateEvaluator(dictionary, 0, 2);
-
- SVScanDocIdIterator iterator = new SVScanDocIdIterator(evaluator, reader,
values.length, dictionary);
-
- assertEquals(iterator.next(), 0); // "apple"
- assertEquals(iterator.next(), 2); // "cherry"
- assertEquals(iterator.next(), 3); // "apple"
- assertEquals(iterator.next(), Constants.EOF);
- }
-
- /// A raw value absent from the dictionary (`dict.indexOf(value)` returns
-1) must be treated as a no-match
- /// without invoking `applySV` on the evaluator.
- @Test
- public void unknownValueDoesNotMatch() {
- String[] values = {"apple", "ghost", "cherry"};
- Map<String, Integer> dictEntries = new HashMap<>();
- dictEntries.put("apple", 0);
- dictEntries.put("cherry", 2);
- // "ghost" is not present → dict.indexOf("ghost") == -1.
-
- ForwardIndexReader reader = mockRawStringReader(values);
- Dictionary dictionary = mockStringDictionary(dictEntries);
- // Matches every known dict id; the only way "ghost" can match is if -1
leaks into applySV.
- BaseDictionaryBasedPredicateEvaluator evaluator = new
TestDictPredicateEvaluator(dictionary, 0, 2);
-
- SVScanDocIdIterator iterator = new SVScanDocIdIterator(evaluator, reader,
values.length, dictionary);
-
- assertEquals(iterator.next(), 0);
- assertEquals(iterator.next(), 2);
- assertEquals(iterator.next(), Constants.EOF);
- }
-
- @SuppressWarnings({"rawtypes", "unchecked"})
- private static ForwardIndexReader mockRawStringReader(String[] values) {
- ForwardIndexReader reader = Mockito.mock(ForwardIndexReader.class);
- Mockito.when(reader.isDictionaryEncoded()).thenReturn(false);
- Mockito.when(reader.getStoredType()).thenReturn(DataType.STRING);
- Mockito.when(reader.createContext()).thenReturn(null);
- Mockito.when(reader.getString(Mockito.anyInt(),
Mockito.any())).thenAnswer(invocation -> {
- int docId = invocation.getArgument(0);
- return values[docId];
- });
- return reader;
- }
-
- private static Dictionary mockStringDictionary(Map<String, Integer> entries)
{
- Dictionary dictionary = Mockito.mock(Dictionary.class);
- Mockito.when(dictionary.length()).thenReturn(entries.size());
-
Mockito.when(dictionary.indexOf(Mockito.anyString())).thenAnswer(invocation -> {
- String value = invocation.getArgument(0);
- Integer id = entries.get(value);
- return id != null ? id : -1;
- });
- return dictionary;
- }
-
- /// Minimal stand-in for IFST/FST/DictId-based regex evaluators: matches a
fixed set of dict ids via
- /// `applySV(int dictId)`. Inherits the default `applySV(String)` that
throws `UnsupportedOperationException`,
- /// so the test fails loudly if the scan picks the wrong matcher.
- private static class TestDictPredicateEvaluator extends
BaseDictionaryBasedPredicateEvaluator {
- private final Set<Integer> _matchingDictIds;
-
- TestDictPredicateEvaluator(Dictionary dictionary, int... matchingDictIds) {
- super(Mockito.mock(Predicate.class), dictionary);
- _matchingDictIds = new HashSet<>();
- for (int id : matchingDictIds) {
- _matchingDictIds.add(id);
- }
- }
-
- @Override
- public boolean applySV(int dictId) {
- return _matchingDictIds.contains(dictId);
- }
- }
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]