This is an automated email from the ASF dual-hosted git repository.

Jackie-Jiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 15a4da0267e Fall back to raw-value REGEXP_LIKE evaluator when no 
dict-consuming index is available (#18599)
15a4da0267e is described below

commit 15a4da0267e683d082d4bfb66ba89cf7d2c065e2
Author: Chaitanya Deepthi <[email protected]>
AuthorDate: Thu May 28 11:39:54 2026 -0700

    Fall back to raw-value REGEXP_LIKE evaluator when no dict-consuming index 
is available (#18599)
---
 .../predicate/PredicateEvaluatorProvider.java      |  32 +++--
 .../org/apache/pinot/core/plan/FilterPlanNode.java |  28 +----
 .../apache/pinot/core/plan/FilterPlanNodeTest.java | 138 +++++++++++++++++++++
 3 files changed, 166 insertions(+), 32 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
index 9a4662d99bb..822a61b05c9 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
@@ -44,26 +44,28 @@ public class PredicateEvaluatorProvider {
   /// Builds a [PredicateEvaluator] for a leaf filter on the column backed by 
`dataSource`. The dictionary is derived
   /// via [#getDictionaryUsableForFiltering], which keeps it only when a 
dict-consuming filter operator (inverted /
   /// exact range) will actually run for this predicate type on the column's 
forward-index encoding. The data type is
-  /// taken from the data-source metadata.
+  /// taken from the data-source metadata. For REGEXP_LIKE, the FST/IFST text 
index (when present) is consulted here
+  /// — the upgrade happens only when the dictionary is usable, so no 
evaluator is built and discarded.
   public static PredicateEvaluator getPredicateEvaluator(Predicate predicate, 
DataSource dataSource,
       QueryContext queryContext) {
     Dictionary dictionary = getDictionaryUsableForFiltering(dataSource, 
queryContext, predicate);
     DataType dataType = dataSource.getDataSourceMetadata().getDataType();
-    return buildEvaluator(predicate, dictionary, dataType, queryContext);
+    return buildEvaluator(predicate, dictionary, dataType, queryContext, 
dataSource);
   }
 
   /// Builds a [PredicateEvaluator] when the value source and `dictionary` are 
already in sync by construction: when
   /// `dictionary` is non-null the source produces dict ids decodable by that 
dictionary; when `dictionary` is null
   /// the source produces raw values. No gating logic runs — the dictionary 
(if any) is taken as-is, so the caller is
-  /// responsible for the match.
+  /// responsible for the match. FST/IFST evaluators are not considered here 
since this overload has no `DataSource`
+  /// to read text indexes from.
   // TODO: Always pass in query context
   public static PredicateEvaluator getPredicateEvaluator(Predicate predicate, 
@Nullable Dictionary dictionary,
       DataType dataType, @Nullable QueryContext queryContext) {
-    return buildEvaluator(predicate, dictionary, dataType, queryContext);
+    return buildEvaluator(predicate, dictionary, dataType, queryContext, null);
   }
 
   private static PredicateEvaluator buildEvaluator(Predicate predicate, 
@Nullable Dictionary dictionary,
-      DataType dataType, @Nullable QueryContext queryContext) {
+      DataType dataType, @Nullable QueryContext queryContext, @Nullable 
DataSource dataSource) {
     try {
       if (dictionary != null) {
         // dictionary based predicate evaluators
@@ -83,9 +85,23 @@ public class PredicateEvaluatorProvider {
           case RANGE:
             return 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator((RangePredicate) 
predicate, dictionary,
                 dataType);
-          case REGEXP_LIKE:
-            return 
RegexpLikePredicateEvaluatorFactory.newDictionaryBasedEvaluator((RegexpLikePredicate)
 predicate,
-                dictionary, dataType, queryContext);
+          case REGEXP_LIKE: {
+            // Prefer FST/IFST text index when present on the data source; 
otherwise fall back to the generic
+            // dict-based evaluator (dict-id scan or eager dict iteration).
+            RegexpLikePredicate regexpLike = (RegexpLikePredicate) predicate;
+            if (dataSource != null) {
+              if (regexpLike.isCaseInsensitive() && dataSource.getIFSTIndex() 
!= null) {
+                return 
IFSTBasedRegexpPredicateEvaluatorFactory.newIFSTBasedEvaluator(regexpLike,
+                    dataSource.getIFSTIndex(), dictionary);
+              }
+              if (!regexpLike.isCaseInsensitive() && dataSource.getFSTIndex() 
!= null) {
+                return 
FSTBasedRegexpPredicateEvaluatorFactory.newFSTBasedEvaluator(regexpLike,
+                    dataSource.getFSTIndex(), dictionary);
+              }
+            }
+            return 
RegexpLikePredicateEvaluatorFactory.newDictionaryBasedEvaluator(regexpLike, 
dictionary, dataType,
+                queryContext);
+          }
           default:
             throw new UnsupportedOperationException("Unsupported predicate 
type: " + predicate.getType());
         }
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java 
b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
index 9074ff6323f..b69d77bcd2f 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
@@ -30,7 +30,6 @@ import org.apache.pinot.common.request.context.FilterContext;
 import org.apache.pinot.common.request.context.FunctionContext;
 import org.apache.pinot.common.request.context.predicate.JsonMatchPredicate;
 import org.apache.pinot.common.request.context.predicate.Predicate;
-import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate;
 import org.apache.pinot.common.request.context.predicate.TextMatchPredicate;
 import 
org.apache.pinot.common.request.context.predicate.VectorSimilarityPredicate;
 import 
org.apache.pinot.common.request.context.predicate.VectorSimilarityRadiusPredicate;
@@ -54,8 +53,6 @@ import org.apache.pinot.core.operator.filter.VectorSearchMode;
 import org.apache.pinot.core.operator.filter.VectorSearchParams;
 import org.apache.pinot.core.operator.filter.VectorSearchStrategy;
 import org.apache.pinot.core.operator.filter.VectorSimilarityFilterOperator;
-import 
org.apache.pinot.core.operator.filter.predicate.FSTBasedRegexpPredicateEvaluatorFactory;
-import 
org.apache.pinot.core.operator.filter.predicate.IFSTBasedRegexpPredicateEvaluatorFactory;
 import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
 import 
org.apache.pinot.core.operator.filter.predicate.PredicateEvaluatorProvider;
 import org.apache.pinot.core.operator.transform.function.ItemTransformFunction;
@@ -306,27 +303,10 @@ public class FilterPlanNode implements PlanNode {
                 return new TextMatchFilterOperator(textIndexReader, 
(TextMatchPredicate) predicate, numDocs);
               }
             case REGEXP_LIKE:
-              // Check if case-insensitive flag is present
-              RegexpLikePredicate regexpLikePredicate = (RegexpLikePredicate) 
predicate;
-              boolean caseInsensitive = 
regexpLikePredicate.isCaseInsensitive();
-              if (caseInsensitive) {
-                if (dataSource.getIFSTIndex() != null) {
-                  predicateEvaluator =
-                      
IFSTBasedRegexpPredicateEvaluatorFactory.newIFSTBasedEvaluator(regexpLikePredicate,
-                          dataSource.getIFSTIndex(), 
dataSource.getDictionary());
-                } else {
-                  predicateEvaluator =
-                      
PredicateEvaluatorProvider.getPredicateEvaluator(predicate, dataSource, 
_queryContext);
-                }
-              } else {
-                if (dataSource.getFSTIndex() != null) {
-                  predicateEvaluator = 
FSTBasedRegexpPredicateEvaluatorFactory.newFSTBasedEvaluator(regexpLikePredicate,
-                      dataSource.getFSTIndex(), dataSource.getDictionary());
-                } else {
-                  predicateEvaluator =
-                      
PredicateEvaluatorProvider.getPredicateEvaluator(predicate, dataSource, 
_queryContext);
-                }
-              }
+              // PredicateEvaluatorProvider handles FST/IFST upgrade 
internally when the dictionary is usable for
+              // filtering and a matching text index exists on the data source.
+              predicateEvaluator =
+                  PredicateEvaluatorProvider.getPredicateEvaluator(predicate, 
dataSource, _queryContext);
               _predicateEvaluators.add(Pair.of(predicate, predicateEvaluator));
               return FilterOperatorUtils.getLeafFilterOperator(_queryContext, 
predicateEvaluator, dataSource, numDocs);
             case JSON_MATCH:
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/plan/FilterPlanNodeTest.java 
b/pinot-core/src/test/java/org/apache/pinot/core/plan/FilterPlanNodeTest.java
index 689db9e2fef..3faced128bb 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/plan/FilterPlanNodeTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/plan/FilterPlanNodeTest.java
@@ -20,24 +20,44 @@ package org.apache.pinot.core.plan;
 
 import java.lang.reflect.Method;
 import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.FilterContext;
+import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate;
 import org.apache.pinot.core.common.BlockDocIdIterator;
 import org.apache.pinot.core.common.BlockDocIdSet;
 import org.apache.pinot.core.operator.blocks.FilterBlock;
 import org.apache.pinot.core.operator.filter.BaseFilterOperator;
+import 
org.apache.pinot.core.operator.filter.predicate.BaseDictIdBasedRegexpLikePredicateEvaluator;
+import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
 import org.apache.pinot.core.query.request.context.QueryContext;
 import org.apache.pinot.segment.local.upsert.UpsertUtils;
 import org.apache.pinot.segment.spi.Constants;
 import org.apache.pinot.segment.spi.IndexSegment;
 import org.apache.pinot.segment.spi.SegmentContext;
 import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.segment.spi.datasource.DataSourceMetadata;
 import org.apache.pinot.segment.spi.index.creator.VectorIndexConfig;
 import 
org.apache.pinot.segment.spi.index.mutable.ThreadSafeMutableRoaringBitmap;
+import org.apache.pinot.segment.spi.index.reader.Dictionary;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader;
+import org.apache.pinot.segment.spi.index.reader.TextIndexReader;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.data.DimensionFieldSpec;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.mockito.Mockito;
 import org.mockito.stubbing.Answer;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
 import org.testng.annotations.Test;
 
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
 
 
 public class FilterPlanNodeTest {
@@ -133,4 +153,122 @@ public class FilterPlanNodeTest {
     }
     return numDocsFiltered;
   }
+
+  @Test
+  public void regexpLikeUsesIFSTEvaluatorWhenIFSTAndInvertedAvailable()
+      throws Exception {
+    PredicateEvaluator evaluator = runRegexpLikeAndGetEvaluator(
+        true, true, false, true, false, true);
+    assertTrue(evaluator.isDictionaryBased());
+    assertTrue(evaluator instanceof 
BaseDictIdBasedRegexpLikePredicateEvaluator);
+  }
+
+  @Test
+  public void regexpLikeFallsBackToRawWhenIFSTPresentButNoDictConsumer()
+      throws Exception {
+    PredicateEvaluator evaluator = runRegexpLikeAndGetEvaluator(
+        true, true, false, true, false, false);
+    assertFalse(evaluator.isDictionaryBased());
+  }
+
+  @Test
+  public void regexpLikeUsesFSTEvaluatorWhenFSTAndInvertedAvailable()
+      throws Exception {
+    PredicateEvaluator evaluator = runRegexpLikeAndGetEvaluator(
+        false, false, true, true, false, true);
+    assertTrue(evaluator.isDictionaryBased());
+    assertTrue(evaluator instanceof 
BaseDictIdBasedRegexpLikePredicateEvaluator);
+  }
+
+  @Test
+  public void regexpLikeFallsBackToRawWhenFSTPresentButNoDictConsumer()
+      throws Exception {
+    PredicateEvaluator evaluator = runRegexpLikeAndGetEvaluator(
+        false, false, true, true, false, false);
+    assertFalse(evaluator.isDictionaryBased());
+  }
+
+  @Test
+  public void regexpLikeUsesIFSTEvaluatorWhenIFSTAndDictEncodedForward()
+      throws Exception {
+    PredicateEvaluator evaluator = runRegexpLikeAndGetEvaluator(
+        true, true, false, true, true, false);
+    assertTrue(evaluator.isDictionaryBased());
+  }
+
+  private PredicateEvaluator runRegexpLikeAndGetEvaluator(boolean 
caseInsensitive, boolean hasIFST, boolean hasFST,
+      boolean hasDictionary, boolean forwardDictEncoded, boolean hasInverted)
+      throws Exception {
+    String column = "col";
+    DataSource dataSource =
+        mockStringDataSource(column, hasIFST, hasFST, hasDictionary, 
forwardDictEncoded, hasInverted);
+    RegexpLikePredicate predicate = caseInsensitive
+        ? new RegexpLikePredicate(ExpressionContext.forIdentifier(column), 
"pat", "i")
+        : new RegexpLikePredicate(ExpressionContext.forIdentifier(column), 
"pat");
+    FilterContext filterContext = FilterContext.forPredicate(predicate);
+
+    IndexSegment segment = mock(IndexSegment.class);
+    SegmentMetadata segmentMetadata = mock(SegmentMetadata.class);
+    when(segmentMetadata.getTotalDocs()).thenReturn(1);
+    when(segment.getSegmentMetadata()).thenReturn(segmentMetadata);
+    when(segment.getDataSource(Mockito.eq(column), 
Mockito.any())).thenReturn(dataSource);
+
+    QueryContext queryContext = mock(QueryContext.class);
+    when(queryContext.getFilter()).thenReturn(filterContext);
+    when(queryContext.isIndexUseAllowed(Mockito.any(DataSource.class), 
Mockito.any(FieldConfig.IndexType.class)))
+        .thenReturn(true);
+
+    SegmentContext segmentContext = new SegmentContext(segment);
+
+    FilterPlanNode planNode = new FilterPlanNode(segmentContext, queryContext);
+    try {
+      planNode.run();
+    } catch (Exception ignored) {
+    }
+
+    Pair<Predicate, PredicateEvaluator> pair = 
planNode.getPredicateEvaluators().get(0);
+    return pair.getRight();
+  }
+
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  private static DataSource mockStringDataSource(String column, boolean 
hasIFST, boolean hasFST,
+      boolean hasDictionary, boolean forwardDictEncoded, boolean hasInverted) {
+    DataSource dataSource = Mockito.mock(DataSource.class);
+    DataSourceMetadata metadata = Mockito.mock(DataSourceMetadata.class);
+    when(metadata.getDataType()).thenReturn(DataType.STRING);
+    when(metadata.isSorted()).thenReturn(false);
+    when(metadata.getFieldSpec()).thenReturn(new DimensionFieldSpec(column, 
DataType.STRING, true));
+    when(dataSource.getDataSourceMetadata()).thenReturn(metadata);
+    when(dataSource.getColumnName()).thenReturn(column);
+
+    ForwardIndexReader forwardIndex = Mockito.mock(ForwardIndexReader.class);
+    when(forwardIndex.isDictionaryEncoded()).thenReturn(forwardDictEncoded);
+    when(forwardIndex.getStoredType()).thenReturn(DataType.STRING);
+    when(dataSource.getForwardIndex()).thenReturn(forwardIndex);
+
+    if (hasDictionary) {
+      Dictionary dictionary = Mockito.mock(Dictionary.class);
+      when(dictionary.length()).thenReturn(0);
+      when(dataSource.getDictionary()).thenReturn(dictionary);
+    } else {
+      when(dataSource.getDictionary()).thenReturn(null);
+    }
+
+    InvertedIndexReader invertedReader = hasInverted ? 
Mockito.mock(InvertedIndexReader.class) : null;
+    TextIndexReader ifstReader = hasIFST ? mockTextIndexReader() : null;
+    TextIndexReader fstReader = hasFST ? mockTextIndexReader() : null;
+    when(dataSource.getInvertedIndex()).thenReturn(invertedReader);
+    when(dataSource.getRangeIndex()).thenReturn(null);
+    when(dataSource.getIFSTIndex()).thenReturn(ifstReader);
+    when(dataSource.getFSTIndex()).thenReturn(fstReader);
+
+    return dataSource;
+  }
+
+  private static TextIndexReader mockTextIndexReader() {
+    TextIndexReader reader = Mockito.mock(TextIndexReader.class);
+    ImmutableRoaringBitmap emptyBitmap = ImmutableRoaringBitmap.bitmapOf();
+    when(reader.getDictIds(Mockito.anyString())).thenReturn(emptyBitmap);
+    return reader;
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to