This is an automated email from the ASF dual-hosted git repository.
saurabhd336 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new c7cc821698 Enhance json index to support regexp and range predicate
evaluation (#12568)
c7cc821698 is described below
commit c7cc821698927c81b809cc0a431bbb039a832cbb
Author: Saurabh Dubey <[email protected]>
AuthorDate: Fri Mar 8 10:13:23 2024 +0530
Enhance json index to support regexp and range predicate evaluation (#12568)
* Enhance json index to support regexp and range predicate evaluation
* Move to TreeMap for mutable json index
* Review comments
* Simplify subMap call
* Lint
* Review comments
---------
Co-authored-by: Saurabh Dubey <[email protected]>
---
.../request/context/RequestContextUtils.java | 30 +++---
.../request/context/predicate/RangePredicate.java | 10 +-
...ngeOfflineDictionaryPredicateEvaluatorTest.java | 2 +-
.../request/context/predicate/PredicateTest.java | 4 +-
.../BrokerRequestToQueryContextConverterTest.java | 4 +-
.../realtime/impl/json/MutableJsonIndexImpl.java | 106 +++++++++++++++++++--
.../readers/json/ImmutableJsonIndexReader.java | 88 ++++++++++++++++-
.../segment/local/segment/index/JsonIndexTest.java | 41 +++++++-
.../spi/index/creator/JsonIndexCreator.java | 1 +
.../java/org/apache/pinot/spi/data/FieldSpec.java | 38 +++++++-
10 files changed, 284 insertions(+), 40 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/request/context/RequestContextUtils.java
b/pinot-common/src/main/java/org/apache/pinot/common/request/context/RequestContextUtils.java
index 958a20da68..a7ab0f3279 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/request/context/RequestContextUtils.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/request/context/RequestContextUtils.java
@@ -209,23 +209,23 @@ public class RequestContextUtils {
case GREATER_THAN:
return FilterContext.forPredicate(
new RangePredicate(getExpression(operands.get(0)), false,
getStringValue(operands.get(1)), false,
- RangePredicate.UNBOUNDED));
+ RangePredicate.UNBOUNDED, new
LiteralContext(operands.get(1).getLiteral()).getType()));
case GREATER_THAN_OR_EQUAL:
return FilterContext.forPredicate(
new RangePredicate(getExpression(operands.get(0)), true,
getStringValue(operands.get(1)), false,
- RangePredicate.UNBOUNDED));
+ RangePredicate.UNBOUNDED, new
LiteralContext(operands.get(1).getLiteral()).getType()));
case LESS_THAN:
return FilterContext.forPredicate(
new RangePredicate(getExpression(operands.get(0)), false,
RangePredicate.UNBOUNDED, false,
- getStringValue(operands.get(1))));
+ getStringValue(operands.get(1)), new
LiteralContext(operands.get(1).getLiteral()).getType()));
case LESS_THAN_OR_EQUAL:
return FilterContext.forPredicate(
new RangePredicate(getExpression(operands.get(0)), false,
RangePredicate.UNBOUNDED, true,
- getStringValue(operands.get(1))));
+ getStringValue(operands.get(1)), new
LiteralContext(operands.get(1).getLiteral()).getType()));
case BETWEEN:
return FilterContext.forPredicate(
new RangePredicate(getExpression(operands.get(0)), true,
getStringValue(operands.get(1)), true,
- getStringValue(operands.get(2))));
+ getStringValue(operands.get(2)), new
LiteralContext(operands.get(1).getLiteral()).getType()));
case RANGE:
return FilterContext.forPredicate(
new RangePredicate(getExpression(operands.get(0)),
getStringValue(operands.get(1))));
@@ -400,22 +400,24 @@ public class RequestContextUtils {
}
case GREATER_THAN:
return FilterContext.forPredicate(
- new RangePredicate(operands.get(0), false,
getStringValue(operands.get(1)), false,
- RangePredicate.UNBOUNDED));
+ new RangePredicate(operands.get(0), false,
getStringValue(operands.get(1)), false, RangePredicate.UNBOUNDED,
+ operands.get(1).getLiteral().getType()));
case GREATER_THAN_OR_EQUAL:
return FilterContext.forPredicate(
- new RangePredicate(operands.get(0), true,
getStringValue(operands.get(1)), false,
- RangePredicate.UNBOUNDED));
+ new RangePredicate(operands.get(0), true,
getStringValue(operands.get(1)), false, RangePredicate.UNBOUNDED,
+ operands.get(1).getLiteral().getType()));
case LESS_THAN:
- return FilterContext.forPredicate(new RangePredicate(operands.get(0),
false, RangePredicate.UNBOUNDED, false,
- getStringValue(operands.get(1))));
+ return FilterContext.forPredicate(
+ new RangePredicate(operands.get(0), false,
RangePredicate.UNBOUNDED, false, getStringValue(operands.get(1)),
+ operands.get(1).getLiteral().getType()));
case LESS_THAN_OR_EQUAL:
- return FilterContext.forPredicate(new RangePredicate(operands.get(0),
false, RangePredicate.UNBOUNDED, true,
- getStringValue(operands.get(1))));
+ return FilterContext.forPredicate(
+ new RangePredicate(operands.get(0), false,
RangePredicate.UNBOUNDED, true, getStringValue(operands.get(1)),
+ operands.get(1).getLiteral().getType()));
case BETWEEN:
return FilterContext.forPredicate(
new RangePredicate(operands.get(0), true,
getStringValue(operands.get(1)), true,
- getStringValue(operands.get(2))));
+ getStringValue(operands.get(2)),
operands.get(1).getLiteral().getType()));
case RANGE:
return FilterContext.forPredicate(new RangePredicate(operands.get(0),
getStringValue(operands.get(1))));
case REGEXP_LIKE:
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/RangePredicate.java
b/pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/RangePredicate.java
index e6b8fb59bc..61b5ebbe91 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/RangePredicate.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/RangePredicate.java
@@ -21,6 +21,7 @@ package org.apache.pinot.common.request.context.predicate;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.utils.CommonConstants.Query.Range;
@@ -43,6 +44,7 @@ public class RangePredicate extends BasePredicate {
private final String _lowerBound;
private final boolean _upperInclusive;
private final String _upperBound;
+ private final FieldSpec.DataType _rangeDataType;
/**
* The range is formatted as 5 parts:
@@ -67,15 +69,17 @@ public class RangePredicate extends BasePredicate {
int upperLength = upper.length();
_upperInclusive = upper.charAt(upperLength - 1) == UPPER_INCLUSIVE;
_upperBound = upper.substring(0, upperLength - 1);
+ _rangeDataType = FieldSpec.DataType.UNKNOWN;
}
public RangePredicate(ExpressionContext lhs, boolean lowerInclusive, String
lowerBound, boolean upperInclusive,
- String upperBound) {
+ String upperBound, FieldSpec.DataType rangeDataType) {
super(lhs);
_lowerInclusive = lowerInclusive;
_lowerBound = lowerBound;
_upperInclusive = upperInclusive;
_upperBound = upperBound;
+ _rangeDataType = rangeDataType;
}
@Override
@@ -99,6 +103,10 @@ public class RangePredicate extends BasePredicate {
return _upperBound;
}
+ public FieldSpec.DataType getRangeDataType() {
+ return _rangeDataType;
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
index bb5e6e9100..e1c8a501ce 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
@@ -270,6 +270,6 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
if (upper == DICT_LEN - 1 && inclUpper) {
upperStr = "*";
}
- return new RangePredicate(COLUMN_EXPRESSION, inclLower, lowerStr,
inclUpper, upperStr);
+ return new RangePredicate(COLUMN_EXPRESSION, inclLower, lowerStr,
inclUpper, upperStr, DataType.STRING);
}
}
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/predicate/PredicateTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/predicate/PredicateTest.java
index 6b52d41546..9a1ee8a07e 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/predicate/PredicateTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/predicate/PredicateTest.java
@@ -25,6 +25,7 @@ import org.apache.pinot.common.request.context.FilterContext;
import org.apache.pinot.common.request.context.RequestContextUtils;
import org.apache.pinot.common.request.context.predicate.Predicate;
import org.apache.pinot.common.request.context.predicate.RangePredicate;
+import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.sql.parsers.CalciteSqlParser;
import org.testng.annotations.Test;
@@ -74,7 +75,8 @@ public class PredicateTest {
// Non-standard RangePredicate (merged ranges)
RangePredicate rangePredicate =
- new RangePredicate(ExpressionContext.forIdentifier("foo"), true,
"123", false, "456");
+ new RangePredicate(ExpressionContext.forIdentifier("foo"), true,
"123", false, "456",
+ FieldSpec.DataType.STRING);
String predicateExpression = rangePredicate.toString();
assertEquals(predicateExpression, "(foo >= '123' AND foo < '456')");
Expression thriftExpression =
CalciteSqlParser.compileToExpression(predicateExpression);
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/utils/BrokerRequestToQueryContextConverterTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/utils/BrokerRequestToQueryContextConverterTest.java
index efeb7ac071..f86c829171 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/utils/BrokerRequestToQueryContextConverterTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/query/request/context/utils/BrokerRequestToQueryContextConverterTest.java
@@ -295,14 +295,14 @@ public class BrokerRequestToQueryContextConverterTest {
List<FilterContext> children = filter.getChildren();
assertEquals(children.size(), 2);
assertEquals(children.get(0), FilterContext.forPredicate(
- new RangePredicate(ExpressionContext.forIdentifier("foo"), false,
"15", false, "*")));
+ new RangePredicate(ExpressionContext.forIdentifier("foo"), false,
"15", false, "*", FieldSpec.DataType.INT)));
FilterContext orFilter = children.get(1);
assertEquals(orFilter.getType(), FilterContext.Type.OR);
assertEquals(orFilter.getChildren().size(), 2);
assertEquals(orFilter.getChildren().get(0),
FilterContext.forPredicate(new RangePredicate(
ExpressionContext.forFunction(new
FunctionContext(FunctionContext.Type.TRANSFORM, "div",
Arrays.asList(ExpressionContext.forIdentifier("bar"),
ExpressionContext.forIdentifier("foo")))), true,
- "10", true, "20")));
+ "10", true, "20", FieldSpec.DataType.INT)));
assertEquals(orFilter.getChildren().get(1),
FilterContext.forPredicate(new
TextMatchPredicate(ExpressionContext.forIdentifier("foobar"), "potato")));
assertEquals(filter.toString(),
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
index 7e632a19b2..78925a55e2 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java
@@ -26,7 +26,9 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.TreeMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.regex.Pattern;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.request.context.FilterContext;
import org.apache.pinot.common.request.context.RequestContextUtils;
@@ -35,10 +37,12 @@ import
org.apache.pinot.common.request.context.predicate.InPredicate;
import org.apache.pinot.common.request.context.predicate.NotEqPredicate;
import org.apache.pinot.common.request.context.predicate.NotInPredicate;
import org.apache.pinot.common.request.context.predicate.Predicate;
-import
org.apache.pinot.segment.local.segment.creator.impl.inv.json.BaseJsonIndexCreator;
+import org.apache.pinot.common.request.context.predicate.RangePredicate;
+import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate;
import org.apache.pinot.segment.spi.index.creator.JsonIndexCreator;
import org.apache.pinot.segment.spi.index.mutable.MutableJsonIndex;
import org.apache.pinot.spi.config.table.JsonIndexConfig;
+import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.exception.BadQueryRequestException;
import org.apache.pinot.spi.utils.JsonUtils;
import org.apache.pinot.sql.parsers.CalciteSqlParser;
@@ -53,7 +57,7 @@ import org.roaringbitmap.buffer.MutableRoaringBitmap;
*/
public class MutableJsonIndexImpl implements MutableJsonIndex {
private final JsonIndexConfig _jsonIndexConfig;
- private final Map<String, RoaringBitmap> _postingListMap;
+ private final TreeMap<String, RoaringBitmap> _postingListMap;
private final IntList _docIdMapping;
private final ReentrantReadWriteLock.ReadLock _readLock;
private final ReentrantReadWriteLock.WriteLock _writeLock;
@@ -63,7 +67,7 @@ public class MutableJsonIndexImpl implements MutableJsonIndex
{
public MutableJsonIndexImpl(JsonIndexConfig jsonIndexConfig) {
_jsonIndexConfig = jsonIndexConfig;
- _postingListMap = new HashMap<>();
+ _postingListMap = new TreeMap<>();
_docIdMapping = new IntArrayList();
ReentrantReadWriteLock readWriteLock = new ReentrantReadWriteLock();
@@ -230,7 +234,7 @@ public class MutableJsonIndexImpl implements
MutableJsonIndex {
if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
// "[0]"=1 -> ".$index"='0' && "."='1'
// ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
- String searchKey = leftPart + JsonUtils.ARRAY_INDEX_KEY +
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+ String searchKey = leftPart + JsonUtils.ARRAY_INDEX_KEY +
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
RoaringBitmap docIds = _postingListMap.get(searchKey);
if (docIds != null) {
if (matchingDocIds == null) {
@@ -250,7 +254,7 @@ public class MutableJsonIndexImpl implements
MutableJsonIndex {
if (predicateType == Predicate.Type.EQ || predicateType ==
Predicate.Type.NOT_EQ) {
String value = predicateType == Predicate.Type.EQ ? ((EqPredicate)
predicate).getValue()
: ((NotEqPredicate) predicate).getValue();
- String keyValuePair = key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR +
value;
+ String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR + value;
RoaringBitmap matchingDocIdsForKeyValuePair =
_postingListMap.get(keyValuePair);
if (matchingDocIdsForKeyValuePair != null) {
if (matchingDocIds == null) {
@@ -267,7 +271,7 @@ public class MutableJsonIndexImpl implements
MutableJsonIndex {
: ((NotInPredicate) predicate).getValues();
RoaringBitmap matchingDocIdsForKeyValuePairs = new RoaringBitmap();
for (String value : values) {
- String keyValuePair = key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR +
value;
+ String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR +
value;
RoaringBitmap matchingDocIdsForKeyValuePair =
_postingListMap.get(keyValuePair);
if (matchingDocIdsForKeyValuePair != null) {
matchingDocIdsForKeyValuePairs.or(matchingDocIdsForKeyValuePair);
@@ -291,6 +295,85 @@ public class MutableJsonIndexImpl implements
MutableJsonIndex {
} else {
return new RoaringBitmap();
}
+ } else if (predicateType == Predicate.Type.REGEXP_LIKE) {
+ Map<String, RoaringBitmap> subMap = getMatchingKeysMap(key);
+ if (subMap.isEmpty()) {
+ return new RoaringBitmap();
+ }
+ Pattern pattern = ((RegexpLikePredicate) predicate).getPattern();
+ RoaringBitmap result = null;
+
+ for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) {
+ if (!pattern.matcher(entry.getKey().substring(key.length() +
1)).matches()) {
+ continue;
+ }
+ if (result == null) {
+ result = entry.getValue().clone();
+ } else {
+ result.or(entry.getValue());
+ }
+ }
+
+ if (result == null) {
+ return new RoaringBitmap();
+ } else {
+ if (matchingDocIds == null) {
+ return result;
+ } else {
+ matchingDocIds.and(result);
+ return matchingDocIds;
+ }
+ }
+ } else if (predicateType == Predicate.Type.RANGE) {
+ Map<String, RoaringBitmap> subMap = getMatchingKeysMap(key);
+ if (subMap.isEmpty()) {
+ return new RoaringBitmap();
+ }
+ RoaringBitmap result = null;
+
+ RangePredicate rangePredicate = (RangePredicate) predicate;
+ FieldSpec.DataType rangeDataType = rangePredicate.getRangeDataType();
+ // Simplify to only support numeric and string types
+ if (rangeDataType.isNumeric()) {
+ rangeDataType = FieldSpec.DataType.DOUBLE;
+ } else {
+ rangeDataType = FieldSpec.DataType.STRING;
+ }
+
+ boolean lowerUnbounded =
rangePredicate.getLowerBound().equals(RangePredicate.UNBOUNDED);
+ boolean upperUnbounded =
rangePredicate.getUpperBound().equals(RangePredicate.UNBOUNDED);
+ boolean lowerInclusive = lowerUnbounded ||
rangePredicate.isLowerInclusive();
+ boolean upperInclusive = upperUnbounded ||
rangePredicate.isUpperInclusive();
+ Object lowerBound = lowerUnbounded ? null :
rangeDataType.convert(rangePredicate.getLowerBound());
+ Object upperBound = upperUnbounded ? null :
rangeDataType.convert(rangePredicate.getUpperBound());
+
+ for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) {
+ Object valueObj =
rangeDataType.convert(entry.getKey().substring(key.length() + 1));
+ boolean lowerCompareResult =
+ lowerUnbounded || (lowerInclusive ?
rangeDataType.compare(valueObj, lowerBound) >= 0
+ : rangeDataType.compare(valueObj, lowerBound) > 0);
+ boolean upperCompareResult =
+ upperUnbounded || (upperInclusive ?
rangeDataType.compare(valueObj, upperBound) <= 0
+ : rangeDataType.compare(valueObj, upperBound) < 0);
+ if (lowerCompareResult && upperCompareResult) {
+ if (result == null) {
+ result = entry.getValue().clone();
+ } else {
+ result.or(entry.getValue());
+ }
+ }
+ }
+
+ if (result == null) {
+ return new RoaringBitmap();
+ } else {
+ if (matchingDocIds == null) {
+ return result;
+ } else {
+ matchingDocIds.and(result);
+ return matchingDocIds;
+ }
+ }
} else {
throw new IllegalStateException("Unsupported json_match predicate type:
" + predicate);
}
@@ -301,10 +384,8 @@ public class MutableJsonIndexImpl implements
MutableJsonIndex {
Map<String, RoaringBitmap> matchingDocsMap = new HashMap<>();
_readLock.lock();
try {
- for (Map.Entry<String, RoaringBitmap> entry :
_postingListMap.entrySet()) {
- if (!entry.getKey().startsWith(key +
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR)) {
- continue;
- }
+ Map<String, RoaringBitmap> subMap = getMatchingKeysMap(key);
+ for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) {
MutableRoaringBitmap flattenedDocIds =
entry.getValue().toMutableRoaringBitmap();
PeekableIntIterator it = flattenedDocIds.getIntIterator();
MutableRoaringBitmap postingList = new MutableRoaringBitmap();
@@ -342,6 +423,11 @@ public class MutableJsonIndexImpl implements
MutableJsonIndex {
return values;
}
+ private Map<String, RoaringBitmap> getMatchingKeysMap(String key) {
+ return _postingListMap.subMap(key + JsonIndexCreator.KEY_VALUE_SEPARATOR,
false,
+ key + JsonIndexCreator.KEY_VALUE_SEPARATOR_NEXT_CHAR, false);
+ }
+
@Override
public void close() {
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
index ac4a24d56d..ee3dc5bcad 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java
@@ -24,6 +24,7 @@ import java.nio.ByteOrder;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.regex.Pattern;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.request.context.FilterContext;
import org.apache.pinot.common.request.context.RequestContextUtils;
@@ -32,11 +33,15 @@ import
org.apache.pinot.common.request.context.predicate.InPredicate;
import org.apache.pinot.common.request.context.predicate.NotEqPredicate;
import org.apache.pinot.common.request.context.predicate.NotInPredicate;
import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.common.request.context.predicate.RangePredicate;
+import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate;
import
org.apache.pinot.segment.local.segment.creator.impl.inv.json.BaseJsonIndexCreator;
import
org.apache.pinot.segment.local.segment.index.readers.BitmapInvertedIndexReader;
import org.apache.pinot.segment.local.segment.index.readers.StringDictionary;
+import org.apache.pinot.segment.spi.index.creator.JsonIndexCreator;
import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.exception.BadQueryRequestException;
import org.apache.pinot.spi.utils.JsonUtils;
import org.apache.pinot.sql.parsers.CalciteSqlParser;
@@ -192,7 +197,7 @@ public class ImmutableJsonIndexReader implements
JsonIndexReader {
// "[0]"=1 -> ".$index"='0' && "."='1'
// ".foo[1].bar"='abc' -> ".foo.$index"=1 && ".foo..bar"='abc'
String searchKey =
- leftPart + JsonUtils.ARRAY_INDEX_KEY +
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+ leftPart + JsonUtils.ARRAY_INDEX_KEY +
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
int dictId = _dictionary.indexOf(searchKey);
if (dictId >= 0) {
ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
@@ -232,7 +237,7 @@ public class ImmutableJsonIndexReader implements
JsonIndexReader {
if (!arrayIndex.equals(JsonUtils.WILDCARD)) {
// "foo[1].bar"='abc' -> "foo.$index"=1 && "foo.bar"='abc'
String searchKey =
- leftPart + JsonUtils.ARRAY_INDEX_KEY +
BaseJsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
+ leftPart + JsonUtils.ARRAY_INDEX_KEY +
JsonIndexCreator.KEY_VALUE_SEPARATOR + arrayIndex;
int dictId = _dictionary.indexOf(searchKey);
if (dictId >= 0) {
ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
@@ -254,7 +259,7 @@ public class ImmutableJsonIndexReader implements
JsonIndexReader {
if (predicateType == Predicate.Type.EQ || predicateType ==
Predicate.Type.NOT_EQ) {
String value = predicateType == Predicate.Type.EQ ? ((EqPredicate)
predicate).getValue()
: ((NotEqPredicate) predicate).getValue();
- String keyValuePair = key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR +
value;
+ String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR + value;
int dictId = _dictionary.indexOf(keyValuePair);
if (dictId >= 0) {
ImmutableRoaringBitmap matchingDocIdsForKeyValuePair =
_invertedIndex.getDocIds(dictId);
@@ -272,7 +277,7 @@ public class ImmutableJsonIndexReader implements
JsonIndexReader {
: ((NotInPredicate) predicate).getValues();
MutableRoaringBitmap matchingDocIdsForKeyValuePairs = new
MutableRoaringBitmap();
for (String value : values) {
- String keyValuePair = key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR +
value;
+ String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR +
value;
int dictId = _dictionary.indexOf(keyValuePair);
if (dictId >= 0) {
matchingDocIdsForKeyValuePairs.or(_invertedIndex.getDocIds(dictId));
@@ -297,6 +302,79 @@ public class ImmutableJsonIndexReader implements
JsonIndexReader {
} else {
return new MutableRoaringBitmap();
}
+ } else if (predicateType == Predicate.Type.REGEXP_LIKE) {
+ Pattern pattern = ((RegexpLikePredicate) predicate).getPattern();
+ int[] dictIds = getDictIdRangeForKey(key);
+
+ MutableRoaringBitmap result = null;
+ for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
+ String value =
_dictionary.getStringValue(dictId).substring(key.length() + 1);
+ if (pattern.matcher(value).matches()) {
+ if (result == null) {
+ result = _invertedIndex.getDocIds(dictId).toMutableRoaringBitmap();
+ } else {
+ result.or(_invertedIndex.getDocIds(dictId));
+ }
+ }
+ }
+ if (result == null) {
+ return new MutableRoaringBitmap();
+ } else {
+ if (matchingDocIds == null) {
+ return result;
+ } else {
+ matchingDocIds.and(result);
+ return matchingDocIds;
+ }
+ }
+ } else if (predicateType == Predicate.Type.RANGE) {
+ RangePredicate rangePredicate = (RangePredicate) predicate;
+ FieldSpec.DataType rangeDataType = rangePredicate.getRangeDataType();
+ // Simplify to only support numeric and string types
+ if (rangeDataType.isNumeric()) {
+ rangeDataType = FieldSpec.DataType.DOUBLE;
+ } else {
+ rangeDataType = FieldSpec.DataType.STRING;
+ }
+
+ boolean lowerUnbounded =
rangePredicate.getLowerBound().equals(RangePredicate.UNBOUNDED);
+ boolean upperUnbounded =
rangePredicate.getUpperBound().equals(RangePredicate.UNBOUNDED);
+ boolean lowerInclusive = lowerUnbounded ||
rangePredicate.isLowerInclusive();
+ boolean upperInclusive = upperUnbounded ||
rangePredicate.isUpperInclusive();
+ Object lowerBound = lowerUnbounded ? null :
rangeDataType.convert(rangePredicate.getLowerBound());
+ Object upperBound = upperUnbounded ? null :
rangeDataType.convert(rangePredicate.getUpperBound());
+
+ int[] dictIds = getDictIdRangeForKey(key);
+ MutableRoaringBitmap result = null;
+ for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
+ String value =
_dictionary.getStringValue(dictId).substring(key.length() + 1);
+ Object valueObj = rangeDataType.convert(value);
+ boolean lowerCompareResult =
+ lowerUnbounded || (lowerInclusive ?
rangeDataType.compare(valueObj, lowerBound) >= 0
+ : rangeDataType.compare(valueObj, lowerBound) > 0);
+ boolean upperCompareResult =
+ upperUnbounded || (upperInclusive ?
rangeDataType.compare(valueObj, upperBound) <= 0
+ : rangeDataType.compare(valueObj, upperBound) < 0);
+
+ if (lowerCompareResult && upperCompareResult) {
+ if (result == null) {
+ result = _invertedIndex.getDocIds(dictId).toMutableRoaringBitmap();
+ } else {
+ result.or(_invertedIndex.getDocIds(dictId));
+ }
+ }
+ }
+
+ if (result == null) {
+ return new MutableRoaringBitmap();
+ } else {
+ if (matchingDocIds == null) {
+ return result;
+ } else {
+ matchingDocIds.and(result);
+ return matchingDocIds;
+ }
+ }
} else {
throw new IllegalStateException("Unsupported json_match predicate type:
" + predicate);
}
@@ -359,7 +437,7 @@ public class ImmutableJsonIndexReader implements
JsonIndexReader {
if (indexOfMin == -1) {
return new int[]{-1, -1}; // if key does not exist, immediately return
}
- int indexOfMax = _dictionary.insertionIndexOf(key + "\u0001");
+ int indexOfMax = _dictionary.insertionIndexOf(key +
JsonIndexCreator.KEY_VALUE_SEPARATOR_NEXT_CHAR);
int minDictId = indexOfMin + 1; // skip the index of the key only
int maxDictId = -1 * indexOfMax - 1; // undo the binary search
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
index 461f8eb93e..63aa83b0e9 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java
@@ -78,16 +78,16 @@ public class JsonIndexTest {
// @formatter: off
// CHECKSTYLE:OFF
String[] records = new String[]{
- "{" + "\"name\":\"adam\"," + "\"age\":20," + "\"addresses\":["
+ "{" + "\"name\":\"adam\"," + "\"age\":20," + "\"score\":1.25," +
"\"addresses\":["
+ " {\"street\":\"street-00\",\"country\":\"us\"}," + "
{\"street\":\"street-01\",\"country\":\"us\"},"
+ " {\"street\":\"street-02\",\"country\":\"ca\"}]," +
"\"skills\":[\"english\",\"programming\"]" + "}",
- "{" + "\"name\":\"bob\"," + "\"age\":25," + "\"addresses\":["
+ "{" + "\"name\":\"bob\"," + "\"age\":25," + "\"score\":1.94," +
"\"addresses\":["
+ " {\"street\":\"street-10\",\"country\":\"ca\"}," + "
{\"street\":\"street-11\",\"country\":\"us\"},"
+ " {\"street\":\"street-12\",\"country\":\"in\"}]," +
"\"skills\":[]" + "}",
- "{" + "\"name\":\"charles\"," + "\"age\":30," + "\"addresses\":["
+ "{" + "\"name\":\"charles\"," + "\"age\":30," + "\"score\":0.90," +
"\"addresses\":["
+ " {\"street\":\"street-20\",\"country\":\"jp\"}," + "
{\"street\":\"street-21\",\"country\":\"kr\"},"
+ " {\"street\":\"street-22\",\"country\":\"cn\"}]," +
"\"skills\":[\"japanese\",\"korean\",\"chinese\"]"
- + "}", "{" + "\"name\":\"david\"," + "\"age\":35," +
"\"addresses\":["
+ + "}", "{" + "\"name\":\"david\"," + "\"age\":35," +
"\"score\":0.9999," + "\"addresses\":["
+ "
{\"street\":\"street-30\",\"country\":\"ca\",\"types\":[\"home\",\"office\"]},"
+ " {\"street\":\"street-31\",\"country\":\"ca\"}," + "
{\"street\":\"street-32\",\"country\":\"ca\"}],"
+ "\"skills\":null" + "}"
@@ -120,6 +120,39 @@ public class JsonIndexTest {
matchingDocIds = getMatchingDocIds(indexReader,
"\"addresses[*].street\" = 'street-21'");
Assert.assertEquals(matchingDocIds.toArray(), new int[]{2});
+ matchingDocIds = getMatchingDocIds(indexReader,
"REGEXP_LIKE(\"addresses[*].street\", 'street-2.*')");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{2});
+
+ matchingDocIds = getMatchingDocIds(indexReader, "\"age\" > 25");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{2, 3});
+
+ matchingDocIds = getMatchingDocIds(indexReader, "\"age\" >= 25");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{1, 2, 3});
+
+ matchingDocIds = getMatchingDocIds(indexReader, "\"age\" < 25");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{0});
+
+ matchingDocIds = getMatchingDocIds(indexReader, "\"age\" <= 25");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 1});
+
+ matchingDocIds = getMatchingDocIds(indexReader, "\"name\" > 'adam'");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{1, 2, 3});
+
+ matchingDocIds = getMatchingDocIds(indexReader, "\"name\" > 'a'");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 1, 2, 3});
+
+ matchingDocIds = getMatchingDocIds(indexReader, "\"score\" > 1");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 1});
+
+ matchingDocIds = getMatchingDocIds(indexReader, "\"score\" > 1.0");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 1});
+
+ matchingDocIds = getMatchingDocIds(indexReader, "\"score\" > 0.99");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 1, 3});
+
+ matchingDocIds = getMatchingDocIds(indexReader,
"REGEXP_LIKE(\"score\", '[0-1]\\.[6-9].*')");
+ Assert.assertEquals(matchingDocIds.toArray(), new int[]{1, 2, 3});
+
matchingDocIds = getMatchingDocIds(indexReader,
"\"addresses[*].street\" NOT IN ('street-10', 'street-22')");
Assert.assertEquals(matchingDocIds.toArray(), new int[]{0, 3});
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/JsonIndexCreator.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/JsonIndexCreator.java
index 64e42c7f35..a886111756 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/JsonIndexCreator.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/JsonIndexCreator.java
@@ -29,6 +29,7 @@ import org.apache.pinot.segment.spi.index.IndexCreator;
*/
public interface JsonIndexCreator extends IndexCreator {
char KEY_VALUE_SEPARATOR = '\0';
+ char KEY_VALUE_SEPARATOR_NEXT_CHAR = KEY_VALUE_SEPARATOR + 1;
@Override
default void add(@Nonnull Object value, int dictId)
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
index e1218452d3..d5f3f7e403 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
@@ -28,6 +28,7 @@ import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nullable;
import org.apache.pinot.spi.utils.BooleanUtils;
+import org.apache.pinot.spi.utils.ByteArray;
import org.apache.pinot.spi.utils.BytesUtils;
import org.apache.pinot.spi.utils.EqualityUtils;
import org.apache.pinot.spi.utils.JsonUtils;
@@ -564,6 +565,39 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
}
}
+ /**
+ * Compares the given values of the data type.
+ *
+ * return 0 if the values are equal
+ * return -1 if value1 is less than value2
+ * return 1 if value1 is greater than value2
+ */
+ public int compare(Object value1, Object value2) {
+ switch (this) {
+ case INT:
+ return Integer.compare((int) value1, (int) value2);
+ case LONG:
+ return Long.compare((long) value1, (long) value2);
+ case FLOAT:
+ return Float.compare((float) value1, (float) value2);
+ case DOUBLE:
+ return Double.compare((double) value1, (double) value2);
+ case BIG_DECIMAL:
+ return ((BigDecimal) value1).compareTo((BigDecimal) value2);
+ case BOOLEAN:
+ return Boolean.compare((boolean) value1, (boolean) value2);
+ case TIMESTAMP:
+ return Long.compare((long) value1, (long) value2);
+ case STRING:
+ case JSON:
+ return ((String) value1).compareTo((String) value2);
+ case BYTES:
+ return ByteArray.compare((byte[]) value1, (byte[]) value2);
+ default:
+ throw new IllegalStateException();
+ }
+ }
+
/**
* Converts the given value of the data type to string.The input value for
BYTES type should be byte[].
*/
@@ -635,8 +669,8 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
public boolean isBackwardCompatibleWith(FieldSpec oldFieldSpec) {
return EqualityUtils.isEqual(_name, oldFieldSpec._name)
- && EqualityUtils.isEqual(_dataType, oldFieldSpec._dataType)
- && EqualityUtils.isEqual(_isSingleValueField,
oldFieldSpec._isSingleValueField);
+ && EqualityUtils.isEqual(_dataType, oldFieldSpec._dataType)
+ && EqualityUtils.isEqual(_isSingleValueField,
oldFieldSpec._isSingleValueField);
}
public static class FieldSpecMetadata {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]