This is an automated email from the ASF dual-hosted git repository.

Jackie-Jiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 9474069ebeb Revert "Fix UnsupportedOperationException in 
SVScanDocIdIterator for RAW forward index + separate dictionary (#18579)" 
(#18590)
9474069ebeb is described below

commit 9474069ebeb0c925b93cf585a8c95da00f7cbbdb
Author: Chaitanya Deepthi <[email protected]>
AuthorDate: Wed May 27 16:44:50 2026 -0700

    Revert "Fix UnsupportedOperationException in SVScanDocIdIterator for RAW 
forward index + separate dictionary (#18579)" (#18590)
    
    This reverts commit acf9d4ff5d32491d0a2d19055424e270999ae14a.
---
 .../dociditerators/SVScanDocIdIterator.java        | 115 ++----------------
 .../dociditerators/SVScanDocIdIteratorTest.java    | 130 ---------------------
 2 files changed, 8 insertions(+), 237 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
index 6ea08e74e64..da718f279c9 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
@@ -18,19 +18,14 @@
  */
 package org.apache.pinot.core.operator.dociditerators;
 
-import java.math.BigDecimal;
 import java.util.Map;
 import java.util.OptionalInt;
-import javax.annotation.Nullable;
 import org.apache.pinot.core.common.BlockDocIdIterator;
-import 
org.apache.pinot.core.operator.filter.predicate.BaseDictionaryBasedPredicateEvaluator;
 import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
 import org.apache.pinot.segment.spi.Constants;
 import org.apache.pinot.segment.spi.datasource.DataSource;
-import org.apache.pinot.segment.spi.index.reader.Dictionary;
 import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
 import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
-import org.apache.pinot.spi.utils.ByteArray;
 import org.roaringbitmap.BatchIterator;
 import org.roaringbitmap.RoaringBitmapWriter;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
@@ -45,11 +40,6 @@ public final class SVScanDocIdIterator implements 
ScanBasedDocIdIterator {
   private final PredicateEvaluator _predicateEvaluator;
   private final ForwardIndexReader _reader;
   private final ForwardIndexReaderContext _readerContext;
-  // Dictionary may exist even when the forward index is RAW (built only for 
secondary indexes such as INVERTED /
-  // FST / IFST / RANGE). When set together with a dictionary-based predicate 
evaluator, scans translate raw values
-  // to dict ids via this dictionary instead of calling applySV(<rawType>) on 
the evaluator (which would throw).
-  @Nullable
-  private final Dictionary _dictionary;
   private final int _numDocs;
   private final ValueMatcher _valueMatcher;
   private final int[] _batch;
@@ -66,7 +56,6 @@ public final class SVScanDocIdIterator implements 
ScanBasedDocIdIterator {
     _predicateEvaluator = predicateEvaluator;
     _reader = dataSource.getForwardIndex();
     _readerContext = _reader.createContext(queryOptions);
-    _dictionary = dataSource.getDictionary();
     _numDocs = numDocs;
     _valueMatcher = getValueMatcher();
     _cardinality = dataSource.getDataSourceMetadata().getCardinality();
@@ -74,17 +63,10 @@ public final class SVScanDocIdIterator implements 
ScanBasedDocIdIterator {
 
   // for testing
   public SVScanDocIdIterator(PredicateEvaluator predicateEvaluator, 
ForwardIndexReader reader, int numDocs) {
-    this(predicateEvaluator, reader, numDocs, null);
-  }
-
-  // for testing
-  public SVScanDocIdIterator(PredicateEvaluator predicateEvaluator, 
ForwardIndexReader reader, int numDocs,
-      @Nullable Dictionary dictionary) {
     _batch = new int[BlockDocIdIterator.OPTIMAL_ITERATOR_BATCH_SIZE];
     _predicateEvaluator = predicateEvaluator;
     _reader = reader;
     _readerContext = reader.createContext();
-    _dictionary = dictionary;
     _numDocs = numDocs;
     _valueMatcher = getValueMatcher();
     _cardinality = -1;
@@ -184,48 +166,26 @@ public final class SVScanDocIdIterator implements 
ScanBasedDocIdIterator {
   private ValueMatcher getValueMatcher() {
     if (_reader.isDictionaryEncoded()) {
       return new DictIdMatcher();
-    }
-    // RAW forward index, but a separate dictionary exists and the predicate 
evaluator is dict-based. Translate raw
-    // values to dict ids on read so applySV(int dictId) can be used. This is 
needed because dict-based evaluators
-    // (e.g. DictIdBasedRegexpLikePredicateEvaluator, 
IFSTBasedRegexpPredicateEvaluator) only implement applySV(int).
-    if (_dictionary != null && _predicateEvaluator instanceof 
BaseDictionaryBasedPredicateEvaluator) {
+    } else {
       switch (_reader.getStoredType()) {
         case INT:
-          return new IntDictLookupMatcher();
+          return new IntMatcher();
         case LONG:
-          return new LongDictLookupMatcher();
+          return new LongMatcher();
         case FLOAT:
-          return new FloatDictLookupMatcher();
+          return new FloatMatcher();
         case DOUBLE:
-          return new DoubleDictLookupMatcher();
+          return new DoubleMatcher();
         case BIG_DECIMAL:
-          return new BigDecimalDictLookupMatcher();
+          return new BigDecimalMatcher();
         case STRING:
-          return new StringDictLookupMatcher();
+          return new StringMatcher();
         case BYTES:
-          return new BytesDictLookupMatcher();
+          return new BytesMatcher();
         default:
           throw new UnsupportedOperationException();
       }
     }
-    switch (_reader.getStoredType()) {
-      case INT:
-        return new IntMatcher();
-      case LONG:
-        return new LongMatcher();
-      case FLOAT:
-        return new FloatMatcher();
-      case DOUBLE:
-        return new DoubleMatcher();
-      case BIG_DECIMAL:
-        return new BigDecimalMatcher();
-      case STRING:
-        return new StringMatcher();
-      case BYTES:
-        return new BytesMatcher();
-      default:
-        throw new UnsupportedOperationException();
-    }
   }
 
   private interface ValueMatcher {
@@ -357,65 +317,6 @@ public final class SVScanDocIdIterator implements 
ScanBasedDocIdIterator {
     }
   }
 
-  // ----- Dict-lookup matchers: forward index is RAW but a separate 
dictionary exists. -----
-
-  private class IntDictLookupMatcher implements ValueMatcher {
-    @Override
-    public boolean doesValueMatch(int docId) {
-      int dictId = _dictionary.indexOf(_reader.getInt(docId, _readerContext));
-      return dictId >= 0 && _predicateEvaluator.applySV(dictId);
-    }
-  }
-
-  private class LongDictLookupMatcher implements ValueMatcher {
-    @Override
-    public boolean doesValueMatch(int docId) {
-      int dictId = _dictionary.indexOf(_reader.getLong(docId, _readerContext));
-      return dictId >= 0 && _predicateEvaluator.applySV(dictId);
-    }
-  }
-
-  private class FloatDictLookupMatcher implements ValueMatcher {
-    @Override
-    public boolean doesValueMatch(int docId) {
-      int dictId = _dictionary.indexOf(_reader.getFloat(docId, 
_readerContext));
-      return dictId >= 0 && _predicateEvaluator.applySV(dictId);
-    }
-  }
-
-  private class DoubleDictLookupMatcher implements ValueMatcher {
-    @Override
-    public boolean doesValueMatch(int docId) {
-      int dictId = _dictionary.indexOf(_reader.getDouble(docId, 
_readerContext));
-      return dictId >= 0 && _predicateEvaluator.applySV(dictId);
-    }
-  }
-
-  private class BigDecimalDictLookupMatcher implements ValueMatcher {
-    @Override
-    public boolean doesValueMatch(int docId) {
-      BigDecimal value = _reader.getBigDecimal(docId, _readerContext);
-      int dictId = _dictionary.indexOf(value);
-      return dictId >= 0 && _predicateEvaluator.applySV(dictId);
-    }
-  }
-
-  private class StringDictLookupMatcher implements ValueMatcher {
-    @Override
-    public boolean doesValueMatch(int docId) {
-      int dictId = _dictionary.indexOf(_reader.getString(docId, 
_readerContext));
-      return dictId >= 0 && _predicateEvaluator.applySV(dictId);
-    }
-  }
-
-  private class BytesDictLookupMatcher implements ValueMatcher {
-    @Override
-    public boolean doesValueMatch(int docId) {
-      int dictId = _dictionary.indexOf(new ByteArray(_reader.getBytes(docId, 
_readerContext)));
-      return dictId >= 0 && _predicateEvaluator.applySV(dictId);
-    }
-  }
-
   @Override
   public void close() {
     if (_readerContext != null) {
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIteratorTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIteratorTest.java
deleted file mode 100644
index a2e499ee3da..00000000000
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIteratorTest.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.core.operator.dociditerators;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import org.apache.pinot.common.request.context.predicate.Predicate;
-import 
org.apache.pinot.core.operator.filter.predicate.BaseDictionaryBasedPredicateEvaluator;
-import org.apache.pinot.segment.spi.Constants;
-import org.apache.pinot.segment.spi.index.reader.Dictionary;
-import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
-import org.apache.pinot.spi.data.FieldSpec.DataType;
-import org.mockito.Mockito;
-import org.testng.annotations.Test;
-
-import static org.testng.Assert.assertEquals;
-
-
-public class SVScanDocIdIteratorTest {
-
-  /// RAW forward index + separate dictionary + dictionary-based predicate 
evaluator
-  /// (the layout used by external/iceberg-backed tables with 
IFST/FST/inverted-needing-dict).
-  /// Without the DictLookupMatcher path, the scan would select 
`StringMatcher` and call
-  /// `applySV(String)` on the dict-based evaluator, throwing 
`UnsupportedOperationException`.
-  @Test
-  public void rawForwardWithDictAndDictBasedEvaluatorRoutesThroughDictLookup() 
{
-    String[] values = {"apple", "banana", "cherry", "apple", "date", "banana"};
-    Map<String, Integer> dictEntries = new HashMap<>();
-    dictEntries.put("apple", 0);
-    dictEntries.put("banana", 1);
-    dictEntries.put("cherry", 2);
-    dictEntries.put("date", 3);
-
-    ForwardIndexReader reader = mockRawStringReader(values);
-    Dictionary dictionary = mockStringDictionary(dictEntries);
-    // Matches dict ids {0, 2} → values "apple" and "cherry".
-    BaseDictionaryBasedPredicateEvaluator evaluator = new 
TestDictPredicateEvaluator(dictionary, 0, 2);
-
-    SVScanDocIdIterator iterator = new SVScanDocIdIterator(evaluator, reader, 
values.length, dictionary);
-
-    assertEquals(iterator.next(), 0);   // "apple"
-    assertEquals(iterator.next(), 2);   // "cherry"
-    assertEquals(iterator.next(), 3);   // "apple"
-    assertEquals(iterator.next(), Constants.EOF);
-  }
-
-  /// A raw value absent from the dictionary (`dict.indexOf(value)` returns 
-1) must be treated as a no-match
-  /// without invoking `applySV` on the evaluator.
-  @Test
-  public void unknownValueDoesNotMatch() {
-    String[] values = {"apple", "ghost", "cherry"};
-    Map<String, Integer> dictEntries = new HashMap<>();
-    dictEntries.put("apple", 0);
-    dictEntries.put("cherry", 2);
-    // "ghost" is not present → dict.indexOf("ghost") == -1.
-
-    ForwardIndexReader reader = mockRawStringReader(values);
-    Dictionary dictionary = mockStringDictionary(dictEntries);
-    // Matches every known dict id; the only way "ghost" can match is if -1 
leaks into applySV.
-    BaseDictionaryBasedPredicateEvaluator evaluator = new 
TestDictPredicateEvaluator(dictionary, 0, 2);
-
-    SVScanDocIdIterator iterator = new SVScanDocIdIterator(evaluator, reader, 
values.length, dictionary);
-
-    assertEquals(iterator.next(), 0);
-    assertEquals(iterator.next(), 2);
-    assertEquals(iterator.next(), Constants.EOF);
-  }
-
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  private static ForwardIndexReader mockRawStringReader(String[] values) {
-    ForwardIndexReader reader = Mockito.mock(ForwardIndexReader.class);
-    Mockito.when(reader.isDictionaryEncoded()).thenReturn(false);
-    Mockito.when(reader.getStoredType()).thenReturn(DataType.STRING);
-    Mockito.when(reader.createContext()).thenReturn(null);
-    Mockito.when(reader.getString(Mockito.anyInt(), 
Mockito.any())).thenAnswer(invocation -> {
-      int docId = invocation.getArgument(0);
-      return values[docId];
-    });
-    return reader;
-  }
-
-  private static Dictionary mockStringDictionary(Map<String, Integer> entries) 
{
-    Dictionary dictionary = Mockito.mock(Dictionary.class);
-    Mockito.when(dictionary.length()).thenReturn(entries.size());
-    
Mockito.when(dictionary.indexOf(Mockito.anyString())).thenAnswer(invocation -> {
-      String value = invocation.getArgument(0);
-      Integer id = entries.get(value);
-      return id != null ? id : -1;
-    });
-    return dictionary;
-  }
-
-  /// Minimal stand-in for IFST/FST/DictId-based regex evaluators: matches a 
fixed set of dict ids via
-  /// `applySV(int dictId)`. Inherits the default `applySV(String)` that 
throws `UnsupportedOperationException`,
-  /// so the test fails loudly if the scan picks the wrong matcher.
-  private static class TestDictPredicateEvaluator extends 
BaseDictionaryBasedPredicateEvaluator {
-    private final Set<Integer> _matchingDictIds;
-
-    TestDictPredicateEvaluator(Dictionary dictionary, int... matchingDictIds) {
-      super(Mockito.mock(Predicate.class), dictionary);
-      _matchingDictIds = new HashSet<>();
-      for (int id : matchingDictIds) {
-        _matchingDictIds.add(id);
-      }
-    }
-
-    @Override
-    public boolean applySV(int dictId) {
-      return _matchingDictIds.contains(dictId);
-    }
-  }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to