This is an automated email from the ASF dual-hosted git repository.
rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 61fc9190bc [multistage] Support IN and NOT-IN Clauses (#9374)
61fc9190bc is described below
commit 61fc9190bc1c2f15b3775acf892689a30da6df5e
Author: Ankit Sultana <[email protected]>
AuthorDate: Thu Sep 15 07:26:28 2022 +0530
[multistage] Support IN and NOT-IN Clauses (#9374)
* [multistage] Support IN Clause With 1 Argument
* Working in/not-in
* Refactor RexExpressionUtils after rebasing with master
---
.../pinot/query/planner/logical/RexExpression.java | 26 +++----
.../query/planner/logical/RexExpressionUtils.java | 84 ++++++++++++++++++++++
.../apache/pinot/query/QueryCompilationTest.java | 4 ++
.../pinot/query/QueryEnvironmentTestBase.java | 2 +
.../query/runtime/operator/AggregateOperator.java | 4 +-
.../pinot/query/runtime/QueryRunnerTest.java | 17 +++++
6 files changed, 117 insertions(+), 20 deletions(-)
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java
index 32a3608c86..a8f51036ca 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpression.java
@@ -18,7 +18,6 @@
*/
package org.apache.pinot.query.planner.logical;
-import com.google.common.base.Preconditions;
import java.math.BigDecimal;
import java.util.List;
import java.util.stream.Collectors;
@@ -29,7 +28,6 @@ import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlKind;
-import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.NlsString;
import org.apache.pinot.common.utils.PinotDataType;
import org.apache.pinot.query.planner.serde.ProtoProperties;
@@ -52,30 +50,24 @@ public interface RexExpression {
} else if (rexNode instanceof RexLiteral) {
RexLiteral rexLiteral = ((RexLiteral) rexNode);
FieldSpec.DataType dataType = toDataType(rexLiteral.getType());
- return new RexExpression.Literal(dataType, rexLiteral.getTypeName(),
- toRexValue(dataType, rexLiteral.getValue()));
+ return new RexExpression.Literal(dataType, toRexValue(dataType,
rexLiteral.getValue()));
} else if (rexNode instanceof RexCall) {
RexCall rexCall = (RexCall) rexNode;
- List<RexExpression> operands =
rexCall.getOperands().stream().map(RexExpression::toRexExpression)
- .collect(Collectors.toList());
- return toRexExpression(rexCall, operands);
+ return toRexExpression(rexCall);
} else {
throw new IllegalArgumentException("Unsupported RexNode type with
SqlKind: " + rexNode.getKind());
}
}
- static RexExpression toRexExpression(RexCall rexCall, List<RexExpression>
operands) {
+ static RexExpression toRexExpression(RexCall rexCall) {
switch (rexCall.getKind()) {
case CAST:
- // CAST is being rewritten into "rexCall.CAST<targetType>(inputValue)",
- // - e.g. result type has already been converted into the CAST
RexCall, so we assert single operand.
- Preconditions.checkState(operands.size() == 1, "CAST takes exactly 2
arguments");
- RelDataType castType = rexCall.getType();
- // add the 2nd argument as the source type info.
- operands.add(new Literal(FieldSpec.DataType.STRING,
rexCall.getOperands().get(0).getType().getSqlTypeName(),
- toPinotDataType(rexCall.getOperands().get(0).getType()).name()));
- return new RexExpression.FunctionCall(rexCall.getKind(),
toDataType(rexCall.getType()), "CAST", operands);
+ return RexExpressionUtils.handleCast(rexCall);
+ case SEARCH:
+ return RexExpressionUtils.handleSearch(rexCall);
default:
+ List<RexExpression> operands =
+
rexCall.getOperands().stream().map(RexExpression::toRexExpression).collect(Collectors.toList());
return new RexExpression.FunctionCall(rexCall.getKind(),
toDataType(rexCall.getType()),
rexCall.getOperator().getName(), operands);
}
@@ -186,7 +178,7 @@ public interface RexExpression {
public Literal() {
}
- public Literal(FieldSpec.DataType dataType, SqlTypeName sqlTypeName,
@Nullable Object value) {
+ public Literal(FieldSpec.DataType dataType, @Nullable Object value) {
_sqlKind = SqlKind.LITERAL;
_dataType = dataType;
_value = value;
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java
new file mode 100644
index 0000000000..92d246b904
--- /dev/null
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RexExpressionUtils.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.query.planner.logical;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Range;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.util.Sarg;
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.pinot.spi.data.FieldSpec;
+
+
+public class RexExpressionUtils {
+
+ private RexExpressionUtils() {
+ }
+
+ static RexExpression handleCast(RexCall rexCall) {
+ // CAST is being rewritten into "rexCall.CAST<targetType>(inputValue)",
+ // - e.g. result type has already been converted into the CAST RexCall,
so we assert single operand.
+ List<RexExpression> operands =
+
rexCall.getOperands().stream().map(RexExpression::toRexExpression).collect(Collectors.toList());
+ Preconditions.checkState(operands.size() == 1, "CAST takes exactly 2
arguments");
+ RelDataType castType = rexCall.getType();
+ // add the 2nd argument as the source type info.
+ operands.add(new RexExpression.Literal(FieldSpec.DataType.STRING,
+
RexExpression.toPinotDataType(rexCall.getOperands().get(0).getType()).name()));
+ return new RexExpression.FunctionCall(rexCall.getKind(),
RexExpression.toDataType(rexCall.getType()), "CAST",
+ operands);
+ }
+
+ // TODO: Add support for range filter expressions (e.g. a > 0 and a < 30)
+ static RexExpression handleSearch(RexCall rexCall) {
+ List<RexNode> operands = rexCall.getOperands();
+ RexInputRef rexInputRef = (RexInputRef) operands.get(0);
+ RexLiteral rexLiteral = (RexLiteral) operands.get(1);
+ FieldSpec.DataType dataType =
RexExpression.toDataType(rexLiteral.getType());
+ Sarg sarg = rexLiteral.getValueAs(Sarg.class);
+ if (sarg.isPoints()) {
+ return new RexExpression.FunctionCall(SqlKind.IN, dataType,
SqlKind.IN.name(), toFunctionOperands(rexInputRef,
+ sarg.rangeSet.asRanges(), dataType));
+ } else if (sarg.isComplementedPoints()) {
+ return new RexExpression.FunctionCall(SqlKind.NOT_IN, dataType,
SqlKind.NOT_IN.name(),
+ toFunctionOperands(rexInputRef,
sarg.rangeSet.complement().asRanges(), dataType));
+ } else {
+ throw new NotImplementedException("Range is not implemented yet");
+ }
+ }
+
+ private static List<RexExpression> toFunctionOperands(RexInputRef
rexInputRef, Set<Range> ranges,
+ FieldSpec.DataType dataType) {
+ List<RexExpression> result = new ArrayList<>(ranges.size() + 1);
+ result.add(RexExpression.toRexExpression(rexInputRef));
+ for (Range range : ranges) {
+ result.add(new RexExpression.Literal(dataType,
RexExpression.toRexValue(dataType, range.lowerEndpoint())));
+ }
+ return result;
+ }
+}
diff --git
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
index bace47a3ed..4223a1717a 100644
---
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
+++
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
@@ -255,6 +255,10 @@ public class QueryCompilationTest extends
QueryEnvironmentTestBase {
new Object[]{"SELECT b.col1 - a.col3 FROM a JOIN c ON a.col1 =
c.col3", "Table 'b' not found"},
// non-agg column not being grouped
new Object[]{"SELECT a.col1, SUM(a.col3) FROM a", "'a.col1' is not
being grouped"},
+ // empty IN clause fails compilation
+ new Object[]{"SELECT a.col1 FROM a WHERE a.col1 IN ()", "Encountered
\"\" at line"},
+ // range filter queries are not supported right now
+ new Object[]{"SELECT a.col1 FROM a WHERE a.col1 > 'x' AND a.col1 <
'y'", "Range is not implemented yet"}
};
}
diff --git
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
index 3d633e932f..799d0ef380 100644
---
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
+++
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryEnvironmentTestBase.java
@@ -67,6 +67,8 @@ public class QueryEnvironmentTestBase {
new Object[]{"SELECT dateTrunc('DAY', a.ts + b.ts) FROM a JOIN b on
a.col1 = b.col1 AND a.col2 = b.col2"},
new Object[]{"SELECT a.col2, a.col3 FROM a JOIN b ON a.col1 = b.col1 "
+ " WHERE a.col3 >= 0 GROUP BY a.col2, a.col3"},
+ new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1
WHERE a.col2 IN ('foo', 'bar') AND"
+ + " b.col2 NOT IN ('alice', 'charlie')"},
};
}
}
diff --git
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java
index 67c5edfa99..cd184fe89f 100644
---
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java
+++
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java
@@ -25,7 +25,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
-import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.core.common.Operator;
@@ -98,8 +97,7 @@ public class AggregateOperator extends
BaseOperator<TransferableBlock> {
private RexExpression toAggregationFunctionOperand(RexExpression
rexExpression) {
List<RexExpression> functionOperands = ((RexExpression.FunctionCall)
rexExpression).getFunctionOperands();
Preconditions.checkState(functionOperands.size() < 2);
- return functionOperands.size() > 0 ? functionOperands.get(0)
- : new RexExpression.Literal(FieldSpec.DataType.INT,
SqlTypeName.INTEGER, 1);
+ return functionOperands.size() > 0 ? functionOperands.get(0) : new
RexExpression.Literal(FieldSpec.DataType.INT, 1);
}
@Override
diff --git
a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java
b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java
index 7ae65076b8..71656f7817 100644
---
a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java
+++
b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java
@@ -109,6 +109,23 @@ public class QueryRunnerTest extends QueryRunnerTestBase {
new Object[]{"SELECT a.col1, a.ts, b.col2, b.col3 FROM a JOIN b ON
a.col1 = b.col2 "
+ " WHERE a.col3 >= 0 AND a.col2 = 'alice' AND b.col3 >= 0", 3},
+ // Join query with IN and Not-IN clause. Table A's side of join will
return 9 rows and Table B's side will
+ // return 2 rows. Join will be only on col1=bar and since A will
return 3 rows with that value and B will return
+ // 1 row, the final output will have 3 rows.
+ new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 "
+ + " WHERE a.col1 IN ('foo', 'bar', 'alice') AND b.col2 NOT IN
('foo', 'alice')", 3},
+
+ // Same query as above but written using OR/AND instead of IN.
+ new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 "
+ + " WHERE (a.col1 = 'foo' OR a.col1 = 'bar' OR a.col1 = 'alice')
AND b.col2 != 'foo'"
+ + " AND b.col2 != 'alice'", 3},
+
+ // Same as above but with single argument IN clauses. Left side of the
join returns 3 rows, and the right side
+ // returns 5 rows. Only key where join succeeds is col1=foo, and since
table B has only 1 row with that value,
+ // the number of rows should be 3.
+ new Object[]{"SELECT a.col1, b.col2 FROM a JOIN b ON a.col1 = b.col1 "
+ + " WHERE a.col1 IN ('foo') AND b.col2 NOT IN ('')", 3},
+
// Projection pushdown
new Object[]{"SELECT a.col1, a.col3 + a.col3 FROM a WHERE a.col3 >= 0
AND a.col2 = 'alice'", 3},
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]