This is an automated email from the ASF dual-hosted git repository.

saurabhd336 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new c7cc821698 Enhance json index to support regexp and range predicate 
evaluation (#12568)
c7cc821698 is described below

commit c7cc821698927c81b809cc0a431bbb039a832cbb
Author: Saurabh Dubey <[email protected]>
AuthorDate: Fri Mar 8 10:13:23 2024 +0530

    Enhance json index to support regexp and range predicate evaluation (#12568)
    
    * Enhance json index to support regexp and range predicate evaluation
    
    * Move to TreeMap for mutable json index
    
    * Review comments
    
    * Simplify subMap call
    
    * Lint
    
    * Review comments
    
    ---------
    
    Co-authored-by: Saurabh Dubey <[email protected]>
---
 .../request/context/RequestContextUtils.java       |  30 +++---
 .../request/context/predicate/RangePredicate.java  |  10 +-
 ...ngeOfflineDictionaryPredicateEvaluatorTest.java |   2 +-
 .../request/context/predicate/PredicateTest.java   |   4 +-
 .../BrokerRequestToQueryContextConverterTest.java  |   4 +-
 .../realtime/impl/json/MutableJsonIndexImpl.java   | 106 +++++++++++++++++++--
 .../readers/json/ImmutableJsonIndexReader.java     |  88 ++++++++++++++++-
 .../segment/local/segment/index/JsonIndexTest.java |  41 +++++++-
 .../spi/index/creator/JsonIndexCreator.java        |   1 +
 .../java/org/apache/pinot/spi/data/FieldSpec.java  |  38 +++++++-
 10 files changed, 284 insertions(+), 40 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/request/context/RequestContextUtils.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/request/context/RequestContextUtils.java
index 958a20da68..a7ab0f3279 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/request/context/RequestContextUtils.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/request/context/RequestContextUtils.java
@@ -209,23 +209,23 @@ public class RequestContextUtils {
       case GREATER_THAN:
         return FilterContext.forPredicate(
             new RangePredicate(getExpression(operands.get(0)), false, 
getStringValue(operands.get(1)), false,
-                RangePredicate.UNBOUNDED));
+                RangePredicate.UNBOUNDED, new 
LiteralContext(operands.get(1).getLiteral()).getType()));
       case GREATER_THAN_OR_EQUAL:
         return FilterContext.forPredicate(
             new RangePredicate(getExpression(operands.get(0)), true, 
getStringValue(operands.get(1)), false,
-                RangePredicate.UNBOUNDED));
+                RangePredicate.UNBOUNDED, new 
LiteralContext(operands.get(1).getLiteral()).getType()));
       case LESS_THAN:
         return FilterContext.forPredicate(
             new RangePredicate(getExpression(operands.get(0)), false, 
RangePredicate.UNBOUNDED, false,
-                getStringValue(operands.get(1))));
+                getStringValue(operands.get(1)), new 
LiteralContext(operands.get(1).getLiteral()).getType()));
       case LESS_THAN_OR_EQUAL:
         return FilterContext.forPredicate(
             new RangePredicate(getExpression(operands.get(0)), false, 
RangePredicate.UNBOUNDED, true,
-                getStringValue(operands.get(1))));
+                getStringValue(operands.get(1)), new 
LiteralContext(operands.get(1).getLiteral()).getType()));
       case BETWEEN:
         return FilterContext.forPredicate(
             new RangePredicate(getExpression(operands.get(0)), true, 
getStringValue(operands.get(1)), true,
-                getStringValue(operands.get(2))));
+                getStringValue(operands.get(2)), new 
LiteralContext(operands.get(1).getLiteral()).getType()));
       case RANGE:
         return FilterContext.forPredicate(
             new RangePredicate(getExpression(operands.get(0)), 
getStringValue(operands.get(1))));
@@ -400,22 +400,24 @@ public class RequestContextUtils {
       }
       case GREATER_THAN:
         return FilterContext.forPredicate(
-            new RangePredicate(operands.get(0), false, 
getStringValue(operands.get(1)), false,
-                RangePredicate.UNBOUNDED));
+            new RangePredicate(operands.get(0), false, 
getStringValue(operands.get(1)), false, RangePredicate.UNBOUNDED,
+                operands.get(1).getLiteral().getType()));
       case GREATER_THAN_OR_EQUAL:
         return FilterContext.forPredicate(
-            new RangePredicate(operands.get(0), true, 
getStringValue(operands.get(1)), false,
-                RangePredicate.UNBOUNDED));
+            new RangePredicate(operands.get(0), true, 
getStringValue(operands.get(1)), false, RangePredicate.UNBOUNDED,
+                operands.get(1).getLiteral().getType()));
       case LESS_THAN:
-        return FilterContext.forPredicate(new RangePredicate(operands.get(0), 
false, RangePredicate.UNBOUNDED, false,
-            getStringValue(operands.get(1))));
+        return FilterContext.forPredicate(
+            new RangePredicate(operands.get(0), false, 
RangePredicate.UNBOUNDED, false, getStringValue(operands.get(1)),
+                operands.get(1).getLiteral().getType()));
       case LESS_THAN_OR_EQUAL:
-        return FilterContext.forPredicate(new RangePredicate(operands.get(0), 
false, RangePredicate.UNBOUNDED, true,
-            getStringValue(operands.get(1))));
+        return FilterContext.forPredicate(
+            new RangePredicate(operands.get(0), false, 
RangePredicate.UNBOUNDED, true, getStringValue(operands.get(1)),
+                operands.get(1).getLiteral().getType()));
       case BETWEEN:
         return FilterContext.forPredicate(
             new RangePredicate(operands.get(0), true, 
getStringValue(operands.get(1)), true,
-                getStringValue(operands.get(2))));
+                getStringValue(operands.get(2)), 
operands.get(1).getLiteral().getType()));
       case RANGE:
         return FilterContext.forPredicate(new RangePredicate(operands.get(0), 
getStringValue(operands.get(1))));
       case REGEXP_LIKE:
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/RangePredicate.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/RangePredicate.java
index e6b8fb59bc..61b5ebbe91 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/RangePredicate.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/RangePredicate.java
@@ -21,6 +21,7 @@ package org.apache.pinot.common.request.context.predicate;
 import java.util.Objects;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.utils.CommonConstants.Query.Range;
 
 
@@ -43,6 +44,7 @@ public class RangePredicate extends BasePredicate {
   private final String _lowerBound;
   private final boolean _upperInclusive;
   private final String _upperBound;
+  private final FieldSpec.DataType _rangeDataType;
 
   /**
    * The range is formatted as 5 parts:
@@ -67,15 +69,17 @@ public class RangePredicate extends BasePredicate {
     int upperLength = upper.length();
     _upperInclusive = upper.charAt(upperLength - 1) == UPPER_INCLUSIVE;
     _upperBound = upper.substring(0, upperLength - 1);
+    _rangeDataType = FieldSpec.DataType.UNKNOWN;
   }
 
   public RangePredicate(ExpressionContext lhs, boolean lowerInclusive, String 
lowerBound, boolean upperInclusive,
-      String upperBound) {
+      String upperBound, FieldSpec.DataType rangeDataType) {
     super(lhs);
     _lowerInclusive = lowerInclusive;
     _lowerBound = lowerBound;
     _upperInclusive = upperInclusive;
     _upperBound = upperBound;
+    _rangeDataType = rangeDataType;
   }
 
   @Override
@@ -99,6 +103,10 @@ public class RangePredicate extends BasePredicate {
     return _upperBound;
   }
 
+  public FieldSpec.DataType getRangeDataType() {
+    return _rangeDataType;
+  }
+
   @Override
   public boolean equals(Object o) {
     if (this == o) {
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
index bb5e6e9100..e1c8a501ce 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
@@ -270,6 +270,6 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
     if (upper == DICT_LEN - 1 && inclUpper) {
       upperStr = "*";
     }
-    return new RangePredicate(COLUMN_EXPRESSION, inclLower, lowerStr, 
inclUpper, upperStr);
+    return new RangePredicate(COLUMN_EXPRESSION, inclLower, lowerStr, 
inclUpper, upperStr, DataType.STRING);
   }
 }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/predicate/PredicateTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/predicate/PredicateTest.java
index 6b52d41546..9a1ee8a07e 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/predicate/PredicateTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/predicate/PredicateTest.java
@@ -25,6 +25,7 @@ import org.apache.pinot.common.request.context.FilterContext;
 import org.apache.pinot.common.request.context.RequestContextUtils;
 import org.apache.pinot.common.request.context.predicate.Predicate;
 import org.apache.pinot.common.request.context.predicate.RangePredicate;
+import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
 import org.testng.annotations.Test;
 
@@ -74,7 +75,8 @@ public class PredicateTest {
 
     // Non-standard RangePredicate (merged ranges)
     RangePredicate rangePredicate =
-        new RangePredicate(ExpressionContext.forIdentifier("foo"), true, 
"123", false, "456");
+        new RangePredicate(ExpressionContext.forIdentifier("foo"), true, 
"123", false, "456",
+            FieldSpec.DataType.STRING);
     String predicateExpression = rangePredicate.toString();
     assertEquals(predicateExpression, "(foo >= '123' AND foo < '456')");
     Expression thriftExpression = 
CalciteSqlParser.compileToExpression(predicateExpression);
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/utils/BrokerRequestToQueryContextConverterTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/utils/BrokerRequestToQueryContextConverterTest.java
index efeb7ac071..f86c829171 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/utils/BrokerRequestToQueryContextConverterTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/utils/BrokerRequestToQueryContextConverterTest.java
@@ -295,14 +295,14 @@ public class BrokerRequestToQueryContextConverterTest {
       List<FilterContext> children = filter.getChildren();
       assertEquals(children.size(), 2);
       assertEquals(children.get(0), FilterContext.forPredicate(
-          new RangePredicate(ExpressionContext.forIdentifier("foo"), false, 
"15", false, "*")));
+          new RangePredicate(ExpressionContext.forIdentifier("foo"), false, 
"15", false, "*", FieldSpec.DataType.INT)));
       FilterContext orFilter = children.get(1);
       assertEquals(orFilter.getType(), FilterContext.Type.OR);
       assertEquals(orFilter.getChildren().size(), 2);
       assertEquals(orFilter.getChildren().get(0), 
FilterContext.forPredicate(new RangePredicate(
           ExpressionContext.forFunction(new 
FunctionContext(FunctionContext.Type.TRANSFORM, "div",
               Arrays.asList(ExpressionContext.forIdentifier("bar"), 
ExpressionContext.forIdentifier("foo")))), true,
-          "10", true, "20")));
+          "10", true, "20", FieldSpec.DataType.INT)));
       assertEquals(orFilter.getChildren().get(1),
           FilterContext.forPredicate(new 
TextMatchPredicate(ExpressionContext.forIdentifier("foobar"), "potato")));
       assertEquals(filter.toString(),
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
index 7e632a19b2..78925a55e2 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
@@ -26,7 +26,9 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.TreeMap;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.regex.Pattern;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.common.request.context.FilterContext;
 import org.apache.pinot.common.request.context.RequestContextUtils;
@@ -35,10 +37,12 @@ import 
org.apache.pinot.common.request.context.predicate.InPredicate;
 import org.apache.pinot.common.request.context.predicate.NotEqPredicate;
 import org.apache.pinot.common.request.context.predicate.NotInPredicate;
 import org.apache.pinot.common.request.context.predicate.Predicate;
-import 
org.apache.pinot.segment.local.segment.creator.impl.inv.json.BaseJsonIndexCreator;
+import org.apache.pinot.common.request.context.predicate.RangePredicate;
+import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate;
 import org.apache.pinot.segment.spi.index.creator.JsonIndexCreator;
 import org.apache.pinot.segment.spi.index.mutable.MutableJsonIndex;
 import org.apache.pinot.spi.config.table.JsonIndexConfig;
+import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.exception.BadQueryRequestException;
 import org.apache.pinot.spi.utils.JsonUtils;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
@@ -53,7 +57,7 @@ import org.roaringbitmap.buffer.MutableRoaringBitmap;
  */
 public class MutableJsonIndexImpl implements MutableJsonIndex {
   private final JsonIndexConfig _jsonIndexConfig;
-  private final Map<String, RoaringBitmap> _postingListMap;
+  private final TreeMap<String, RoaringBitmap> _postingListMap;
   private final IntList _docIdMapping;
   private final ReentrantReadWriteLock.ReadLock _readLock;
   private final ReentrantReadWriteLock.WriteLock _writeLock;
@@ -63,7 +67,7 @@ public class MutableJsonIndexImpl implements MutableJsonIndex 
{
 
   public MutableJsonIndexImpl(JsonIndexConfig jsonIndexConfig) {
     _jsonIndexConfig = jsonIndexConfig;
-    _postingListMap = new HashMap<>();
+    _postingListMap = new TreeMap<>();
     _docIdMapping = new IntArrayList();
 
     ReentrantReadWriteLock readWriteLock = new ReentrantReadWriteLock();
@@ -230,7 +234,7 @@ public class MutableJsonIndexImpl implements 
MutableJsonIndex {
       if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
         // "[0]"=1 -> ".$index"='0' && "."='1'
         // ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
-        String searchKey = leftPart + JsonUtils.ARRAY_INDEX_KEY + 
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+        String searchKey = leftPart + JsonUtils.ARRAY_INDEX_KEY + 
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
         RoaringBitmap docIds = _postingListMap.get(searchKey);
         if (docIds != null) {
           if (matchingDocIds == null) {
@@ -250,7 +254,7 @@ public class MutableJsonIndexImpl implements 
MutableJsonIndex {
     if (predicateType == Predicate.Type.EQ || predicateType == 
Predicate.Type.NOT_EQ) {
       String value = predicateType == Predicate.Type.EQ ? ((EqPredicate) 
predicate).getValue()
           : ((NotEqPredicate) predicate).getValue();
-      String keyValuePair = key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + 
value;
+      String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR + value;
       RoaringBitmap matchingDocIdsForKeyValuePair = 
_postingListMap.get(keyValuePair);
       if (matchingDocIdsForKeyValuePair != null) {
         if (matchingDocIds == null) {
@@ -267,7 +271,7 @@ public class MutableJsonIndexImpl implements 
MutableJsonIndex {
           : ((NotInPredicate) predicate).getValues();
       RoaringBitmap matchingDocIdsForKeyValuePairs = new RoaringBitmap();
       for (String value : values) {
-        String keyValuePair = key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + 
value;
+        String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR + 
value;
         RoaringBitmap matchingDocIdsForKeyValuePair = 
_postingListMap.get(keyValuePair);
         if (matchingDocIdsForKeyValuePair != null) {
           matchingDocIdsForKeyValuePairs.or(matchingDocIdsForKeyValuePair);
@@ -291,6 +295,85 @@ public class MutableJsonIndexImpl implements 
MutableJsonIndex {
       } else {
         return new RoaringBitmap();
       }
+    } else if (predicateType == Predicate.Type.REGEXP_LIKE) {
+      Map<String, RoaringBitmap> subMap = getMatchingKeysMap(key);
+      if (subMap.isEmpty()) {
+        return new RoaringBitmap();
+      }
+      Pattern pattern = ((RegexpLikePredicate) predicate).getPattern();
+      RoaringBitmap result = null;
+
+      for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) {
+        if (!pattern.matcher(entry.getKey().substring(key.length() + 
1)).matches()) {
+          continue;
+        }
+        if (result == null) {
+          result = entry.getValue().clone();
+        } else {
+          result.or(entry.getValue());
+        }
+      }
+
+      if (result == null) {
+        return new RoaringBitmap();
+      } else {
+        if (matchingDocIds == null) {
+          return result;
+        } else {
+          matchingDocIds.and(result);
+          return matchingDocIds;
+        }
+      }
+    } else if (predicateType == Predicate.Type.RANGE) {
+      Map<String, RoaringBitmap> subMap = getMatchingKeysMap(key);
+      if (subMap.isEmpty()) {
+        return new RoaringBitmap();
+      }
+      RoaringBitmap result = null;
+
+      RangePredicate rangePredicate = (RangePredicate) predicate;
+      FieldSpec.DataType rangeDataType = rangePredicate.getRangeDataType();
+      // Simplify to only support numeric and string types
+      if (rangeDataType.isNumeric()) {
+        rangeDataType = FieldSpec.DataType.DOUBLE;
+      } else {
+        rangeDataType = FieldSpec.DataType.STRING;
+      }
+
+      boolean lowerUnbounded = 
rangePredicate.getLowerBound().equals(RangePredicate.UNBOUNDED);
+      boolean upperUnbounded = 
rangePredicate.getUpperBound().equals(RangePredicate.UNBOUNDED);
+      boolean lowerInclusive = lowerUnbounded || 
rangePredicate.isLowerInclusive();
+      boolean upperInclusive = upperUnbounded || 
rangePredicate.isUpperInclusive();
+      Object lowerBound = lowerUnbounded ? null : 
rangeDataType.convert(rangePredicate.getLowerBound());
+      Object upperBound = upperUnbounded ? null : 
rangeDataType.convert(rangePredicate.getUpperBound());
+
+      for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) {
+        Object valueObj = 
rangeDataType.convert(entry.getKey().substring(key.length() + 1));
+        boolean lowerCompareResult =
+            lowerUnbounded || (lowerInclusive ? 
rangeDataType.compare(valueObj, lowerBound) >= 0
+                : rangeDataType.compare(valueObj, lowerBound) > 0);
+        boolean upperCompareResult =
+            upperUnbounded || (upperInclusive ? 
rangeDataType.compare(valueObj, upperBound) <= 0
+                : rangeDataType.compare(valueObj, upperBound) < 0);
+        if (lowerCompareResult && upperCompareResult) {
+          if (result == null) {
+            result = entry.getValue().clone();
+          } else {
+            result.or(entry.getValue());
+          }
+        }
+      }
+
+      if (result == null) {
+        return new RoaringBitmap();
+      } else {
+        if (matchingDocIds == null) {
+          return result;
+        } else {
+          matchingDocIds.and(result);
+          return matchingDocIds;
+        }
+      }
     } else {
       throw new IllegalStateException("Unsupported json_match predicate type: 
" + predicate);
     }
@@ -301,10 +384,8 @@ public class MutableJsonIndexImpl implements 
MutableJsonIndex {
     Map<String, RoaringBitmap> matchingDocsMap = new HashMap<>();
     _readLock.lock();
     try {
-      for (Map.Entry<String, RoaringBitmap> entry : 
_postingListMap.entrySet()) {
-        if (!entry.getKey().startsWith(key + 
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR)) {
-          continue;
-        }
+      Map<String, RoaringBitmap> subMap = getMatchingKeysMap(key);
+      for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) {
         MutableRoaringBitmap flattenedDocIds = 
entry.getValue().toMutableRoaringBitmap();
         PeekableIntIterator it = flattenedDocIds.getIntIterator();
         MutableRoaringBitmap postingList = new MutableRoaringBitmap();
@@ -342,6 +423,11 @@ public class MutableJsonIndexImpl implements 
MutableJsonIndex {
     return values;
   }
 
+  private Map<String, RoaringBitmap> getMatchingKeysMap(String key) {
+    return _postingListMap.subMap(key + JsonIndexCreator.KEY_VALUE_SEPARATOR, 
false,
+        key + JsonIndexCreator.KEY_VALUE_SEPARATOR_NEXT_CHAR, false);
+  }
+
   @Override
   public void close() {
   }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
index ac4a24d56d..ee3dc5bcad 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
@@ -24,6 +24,7 @@ import java.nio.ByteOrder;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.common.request.context.FilterContext;
 import org.apache.pinot.common.request.context.RequestContextUtils;
@@ -32,11 +33,15 @@ import 
org.apache.pinot.common.request.context.predicate.InPredicate;
 import org.apache.pinot.common.request.context.predicate.NotEqPredicate;
 import org.apache.pinot.common.request.context.predicate.NotInPredicate;
 import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.common.request.context.predicate.RangePredicate;
+import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate;
 import 
org.apache.pinot.segment.local.segment.creator.impl.inv.json.BaseJsonIndexCreator;
 import 
org.apache.pinot.segment.local.segment.index.readers.BitmapInvertedIndexReader;
 import org.apache.pinot.segment.local.segment.index.readers.StringDictionary;
+import org.apache.pinot.segment.spi.index.creator.JsonIndexCreator;
 import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
 import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.exception.BadQueryRequestException;
 import org.apache.pinot.spi.utils.JsonUtils;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
@@ -192,7 +197,7 @@ public class ImmutableJsonIndexReader implements 
JsonIndexReader {
           // "[0]"=1 -> ".$index"='0' && "."='1'
           // ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
           String searchKey =
-              leftPart + JsonUtils.ARRAY_INDEX_KEY + 
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+              leftPart + JsonUtils.ARRAY_INDEX_KEY + 
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
           int dictId = _dictionary.indexOf(searchKey);
           if (dictId >= 0) {
             ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
@@ -232,7 +237,7 @@ public class ImmutableJsonIndexReader implements 
JsonIndexReader {
         if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
           // "foo[1].bar"='abc' -> "foo.$index"=1 && "foo.bar"='abc'
           String searchKey =
-              leftPart + JsonUtils.ARRAY_INDEX_KEY + 
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+              leftPart + JsonUtils.ARRAY_INDEX_KEY + 
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
           int dictId = _dictionary.indexOf(searchKey);
           if (dictId >= 0) {
             ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
@@ -254,7 +259,7 @@ public class ImmutableJsonIndexReader implements 
JsonIndexReader {
     if (predicateType == Predicate.Type.EQ || predicateType == 
Predicate.Type.NOT_EQ) {
       String value = predicateType == Predicate.Type.EQ ? ((EqPredicate) 
predicate).getValue()
           : ((NotEqPredicate) predicate).getValue();
-      String keyValuePair = key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + 
value;
+      String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR + value;
       int dictId = _dictionary.indexOf(keyValuePair);
       if (dictId >= 0) {
         ImmutableRoaringBitmap matchingDocIdsForKeyValuePair = 
_invertedIndex.getDocIds(dictId);
@@ -272,7 +277,7 @@ public class ImmutableJsonIndexReader implements 
JsonIndexReader {
           : ((NotInPredicate) predicate).getValues();
       MutableRoaringBitmap matchingDocIdsForKeyValuePairs = new 
MutableRoaringBitmap();
       for (String value : values) {
-        String keyValuePair = key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + 
value;
+        String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR + 
value;
         int dictId = _dictionary.indexOf(keyValuePair);
         if (dictId >= 0) {
           matchingDocIdsForKeyValuePairs.or(_invertedIndex.getDocIds(dictId));
@@ -297,6 +302,79 @@ public class ImmutableJsonIndexReader implements 
JsonIndexReader {
       } else {
         return new MutableRoaringBitmap();
       }
+    } else if (predicateType == Predicate.Type.REGEXP_LIKE) {
+      Pattern pattern = ((RegexpLikePredicate) predicate).getPattern();
+      int[] dictIds = getDictIdRangeForKey(key);
+
+      MutableRoaringBitmap result = null;
+      for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
+        String value = 
_dictionary.getStringValue(dictId).substring(key.length() + 1);
+        if (pattern.matcher(value).matches()) {
+          if (result == null) {
+            result = _invertedIndex.getDocIds(dictId).toMutableRoaringBitmap();
+          } else {
+            result.or(_invertedIndex.getDocIds(dictId));
+          }
+        }
+      }
+      if (result == null) {
+        return new MutableRoaringBitmap();
+      } else {
+        if (matchingDocIds == null) {
+          return result;
+        } else {
+          matchingDocIds.and(result);
+          return matchingDocIds;
+        }
+      }
+    } else if (predicateType == Predicate.Type.RANGE) {
+      RangePredicate rangePredicate = (RangePredicate) predicate;
+      FieldSpec.DataType rangeDataType = rangePredicate.getRangeDataType();
+      // Simplify to only support numeric and string types
+      if (rangeDataType.isNumeric()) {
+        rangeDataType = FieldSpec.DataType.DOUBLE;
+      } else {
+        rangeDataType = FieldSpec.DataType.STRING;
+      }
+
+      boolean lowerUnbounded = 
rangePredicate.getLowerBound().equals(RangePredicate.UNBOUNDED);
+      boolean upperUnbounded = 
rangePredicate.getUpperBound().equals(RangePredicate.UNBOUNDED);
+      boolean lowerInclusive = lowerUnbounded || 
rangePredicate.isLowerInclusive();
+      boolean upperInclusive = upperUnbounded || 
rangePredicate.isUpperInclusive();
+      Object lowerBound = lowerUnbounded ? null : 
rangeDataType.convert(rangePredicate.getLowerBound());
+      Object upperBound = upperUnbounded ? null : 
rangeDataType.convert(rangePredicate.getUpperBound());
+
+      int[] dictIds = getDictIdRangeForKey(key);
+      MutableRoaringBitmap result = null;
+      for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
+        String value = 
_dictionary.getStringValue(dictId).substring(key.length() + 1);
+        Object valueObj = rangeDataType.convert(value);
+        boolean lowerCompareResult =
+            lowerUnbounded || (lowerInclusive ? 
rangeDataType.compare(valueObj, lowerBound) >= 0
+                : rangeDataType.compare(valueObj, lowerBound) > 0);
+        boolean upperCompareResult =
+            upperUnbounded || (upperInclusive ? 
rangeDataType.compare(valueObj, upperBound) <= 0
+                : rangeDataType.compare(valueObj, upperBound) < 0);
+
+        if (lowerCompareResult && upperCompareResult) {
+          if (result == null) {
+            result = _invertedIndex.getDocIds(dictId).toMutableRoaringBitmap();
+          } else {
+            result.or(_invertedIndex.getDocIds(dictId));
+          }
+        }
+      }
+
+      if (result == null) {
+        return new MutableRoaringBitmap();
+      } else {
+        if (matchingDocIds == null) {
+          return result;
+        } else {
+          matchingDocIds.and(result);
+          return matchingDocIds;
+        }
+      }
     } else {
       throw new IllegalStateException("Unsupported json_match predicate type: 
" + predicate);
     }
@@ -359,7 +437,7 @@ public class ImmutableJsonIndexReader implements 
JsonIndexReader {
     if (indexOfMin == -1) {
       return new int[]{-1, -1}; // if key does not exist, immediately return
     }
-    int indexOfMax = _dictionary.insertionIndexOf(key + "\u0001");
+    int indexOfMax = _dictionary.insertionIndexOf(key + 
JsonIndexCreator.KEY_VALUE_SEPARATOR_NEXT_CHAR);
 
     int minDictId = indexOfMin + 1; // skip the index of the key only
     int maxDictId = -1 * indexOfMax - 1; // undo the binary search
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
index 461f8eb93e..63aa83b0e9 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
@@ -78,16 +78,16 @@ public class JsonIndexTest {
     // @formatter: off
     // CHECKSTYLE:OFF
     String[] records = new String[]{
-        "{" + "\"name\":\"adam\"," + "\"age\":20," + "\"addresses\":["
+        "{" + "\"name\":\"adam\"," + "\"age\":20," + "\"score\":1.25," + 
"\"addresses\":["
             + "   {\"street\":\"street-00\",\"country\":\"us\"}," + "   
{\"street\":\"street-01\",\"country\":\"us\"},"
             + "   {\"street\":\"street-02\",\"country\":\"ca\"}]," + 
"\"skills\":[\"english\",\"programming\"]" + "}",
-        "{" + "\"name\":\"bob\"," + "\"age\":25," + "\"addresses\":["
+        "{" + "\"name\":\"bob\"," + "\"age\":25," + "\"score\":1.94," + 
"\"addresses\":["
             + "   {\"street\":\"street-10\",\"country\":\"ca\"}," + "   
{\"street\":\"street-11\",\"country\":\"us\"},"
             + "   {\"street\":\"street-12\",\"country\":\"in\"}]," + 
"\"skills\":[]" + "}",
-        "{" + "\"name\":\"charles\"," + "\"age\":30," + "\"addresses\":["
+        "{" + "\"name\":\"charles\"," + "\"age\":30," + "\"score\":0.90,"  + 
"\"addresses\":["
             + "   {\"street\":\"street-20\",\"country\":\"jp\"}," + "   
{\"street\":\"street-21\",\"country\":\"kr\"},"
             + "   {\"street\":\"street-22\",\"country\":\"cn\"}]," + 
"\"skills\":[\"japanese\",\"korean\",\"chinese\"]"
-            + "}", "{" + "\"name\":\"david\"," + "\"age\":35," + 
"\"addresses\":["
+            + "}", "{" + "\"name\":\"david\"," + "\"age\":35," + 
"\"score\":0.9999,"  + "\"addresses\":["
         + "   
{\"street\":\"street-30\",\"country\":\"ca\",\"types\":[\"home\",\"office\"]},"
         + "   {\"street\":\"street-31\",\"country\":\"ca\"}," + "   
{\"street\":\"street-32\",\"country\":\"ca\"}],"
         + "\"skills\":null" + "}"
@@ -120,6 +120,39 @@ public class JsonIndexTest {
         matchingDocIds = getMatchingDocIds(indexReader, 
"\"addresses[*].street\" = 'street-21'");
         Assert.assertEquals(matchingDocIds.toArray(), new int[]{2});
 
+        matchingDocIds = getMatchingDocIds(indexReader, 
"REGEXP_LIKE(\"addresses[*].street\", 'street-2.*')");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{2});
+
+        matchingDocIds = getMatchingDocIds(indexReader, "\"age\" > 25");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{2, 3});
+
+        matchingDocIds = getMatchingDocIds(indexReader, "\"age\" >= 25");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{1, 2, 3});
+
+        matchingDocIds = getMatchingDocIds(indexReader, "\"age\" < 25");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{0});
+
+        matchingDocIds = getMatchingDocIds(indexReader, "\"age\" <= 25");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 1});
+
+        matchingDocIds = getMatchingDocIds(indexReader, "\"name\" > 'adam'");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{1, 2, 3});
+
+        matchingDocIds = getMatchingDocIds(indexReader, "\"name\" > 'a'");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 1, 2, 3});
+
+        matchingDocIds = getMatchingDocIds(indexReader, "\"score\" > 1");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 1});
+
+        matchingDocIds = getMatchingDocIds(indexReader, "\"score\" > 1.0");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 1});
+
+        matchingDocIds = getMatchingDocIds(indexReader, "\"score\" > 0.99");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 1, 3});
+
+        matchingDocIds = getMatchingDocIds(indexReader, 
"REGEXP_LIKE(\"score\", '[0-1]\\.[6-9].*')");
+        Assert.assertEquals(matchingDocIds.toArray(), new int[]{1, 2, 3});
+
         matchingDocIds = getMatchingDocIds(indexReader, 
"\"addresses[*].street\" NOT IN ('street-10', 'street-22')");
         Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 3});
 
diff --git 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/JsonIndexCreator.java
 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/JsonIndexCreator.java
index 64e42c7f35..a886111756 100644
--- 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/JsonIndexCreator.java
+++ 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/JsonIndexCreator.java
@@ -29,6 +29,7 @@ import org.apache.pinot.segment.spi.index.IndexCreator;
  */
 public interface JsonIndexCreator extends IndexCreator {
   char KEY_VALUE_SEPARATOR = '\0';
+  char KEY_VALUE_SEPARATOR_NEXT_CHAR = KEY_VALUE_SEPARATOR + 1;
 
   @Override
   default void add(@Nonnull Object value, int dictId)
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
index e1218452d3..d5f3f7e403 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
@@ -28,6 +28,7 @@ import java.util.HashMap;
 import java.util.Map;
 import javax.annotation.Nullable;
 import org.apache.pinot.spi.utils.BooleanUtils;
+import org.apache.pinot.spi.utils.ByteArray;
 import org.apache.pinot.spi.utils.BytesUtils;
 import org.apache.pinot.spi.utils.EqualityUtils;
 import org.apache.pinot.spi.utils.JsonUtils;
@@ -564,6 +565,39 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
       }
     }
 
+    /**
+     * Compares the given values of the data type.
+     *
+     * return 0 if the values are equal
+     * return -1 if value1 is less than value2
+     * return 1 if value1 is greater than value2
+     */
+    public int compare(Object value1, Object value2) {
+      switch (this) {
+        case INT:
+          return Integer.compare((int) value1, (int) value2);
+        case LONG:
+          return Long.compare((long) value1, (long) value2);
+        case FLOAT:
+          return Float.compare((float) value1, (float) value2);
+        case DOUBLE:
+          return Double.compare((double) value1, (double) value2);
+        case BIG_DECIMAL:
+          return ((BigDecimal) value1).compareTo((BigDecimal) value2);
+        case BOOLEAN:
+          return Boolean.compare((boolean) value1, (boolean) value2);
+        case TIMESTAMP:
+          return Long.compare((long) value1, (long) value2);
+        case STRING:
+        case JSON:
+          return ((String) value1).compareTo((String) value2);
+        case BYTES:
+          return ByteArray.compare((byte[]) value1, (byte[]) value2);
+        default:
+          throw new IllegalStateException();
+      }
+    }
+
     /**
      * Converts the given value of the data type to string.The input value for 
BYTES type should be byte[].
      */
@@ -635,8 +669,8 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
   public boolean isBackwardCompatibleWith(FieldSpec oldFieldSpec) {
 
     return EqualityUtils.isEqual(_name, oldFieldSpec._name)
-            && EqualityUtils.isEqual(_dataType, oldFieldSpec._dataType)
-            && EqualityUtils.isEqual(_isSingleValueField, 
oldFieldSpec._isSingleValueField);
+        && EqualityUtils.isEqual(_dataType, oldFieldSpec._dataType)
+        && EqualityUtils.isEqual(_isSingleValueField, 
oldFieldSpec._isSingleValueField);
   }
 
   public static class FieldSpecMetadata {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to