>From Ritik Raj <[email protected]>:
Ritik Raj has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19288 )
Change subject: [ASTERIXDB-3540][COMP] Fixed calculation of expected schema for
pushdown
......................................................................
[ASTERIXDB-3540][COMP] Fixed calculation of expected schema for pushdown
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
if the getField expr consisted of a function which needs to be
evaluated at runtime, the pushdown computer was not evaluating
those expression leading to incorrect computation.
eg:
1. `field-access-by-name`(t.r.p, x.y.age_field)
2. `field-access-by-name`(t.r.p, substring(x.y.age_field, 0, 4))
Ext-ref: MB-64730
Change-Id: Iac55527af143c292557158ca8e47e92538e93970
---
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp
M
asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
M
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
A
asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan
A
asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaBuilder.java
A asterixdb/asterix-app/data/hdfs/parquet/friends.json
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp
9 files changed, 189 insertions(+), 6 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/88/19288/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaBuilder.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaBuilder.java
index e442d64..299af59 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaBuilder.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaBuilder.java
@@ -56,6 +56,11 @@
public boolean setSchemaFromExpression(AbstractFunctionCallExpression
expr, LogicalVariable producedVar,
IVariableTypeEnvironment typeEnv) throws AlgebricksException {
+ return buildExpectedSchemaNodes(expr, typeEnv, producedVar);
+ }
+
+ private boolean
setSchemaFromCalculatedExpression(AbstractFunctionCallExpression expr,
LogicalVariable producedVar,
+ IVariableTypeEnvironment typeEnv) throws AlgebricksException {
//Parent always nested
AbstractComplexExpectedSchemaNode parent =
(AbstractComplexExpectedSchemaNode) buildNestedNode(expr, typeEnv);
if (parent != null) {
@@ -66,7 +71,6 @@
varToNode.put(producedVar, actualNode);
}
}
-
return parent != null;
}
@@ -104,6 +108,66 @@
return varToNode.get(variable);
}
+ private boolean buildExpectedSchemaNodes(ILogicalExpression expr,
IVariableTypeEnvironment typeEnv,
+ LogicalVariable producedVar) throws AlgebricksException {
+ return buildNestedNodes(expr, typeEnv, producedVar);
+ }
+
+ private boolean buildNestedNodes(ILogicalExpression expr,
IVariableTypeEnvironment typeEnv,
+ LogicalVariable producedVar) throws AlgebricksException {
+ //The current node expression
+ boolean changed = false;
+ if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+ return false;
+ }
+ AbstractFunctionCallExpression myExpr =
(AbstractFunctionCallExpression) expr;
+ if (!SUPPORTED_FUNCTIONS.contains(myExpr.getFunctionIdentifier()) ||
noArgsOrFirstArgIsConstant(myExpr)) {
+ // Check if the function consists of the Supported Functions
+ for (Mutable<ILogicalExpression> arg : myExpr.getArguments()) {
+ changed |= buildNestedNodes(arg.getValue(), typeEnv,
producedVar);
+ }
+ return changed;
+ }
+ // if the child is not a function expression, then just one node.
+ if (BuiltinFunctions.ARRAY_STAR.equals(myExpr.getFunctionIdentifier())
+ ||
BuiltinFunctions.SCAN_COLLECTION.equals(myExpr.getFunctionIdentifier())) {
+ // these supported function won't have second child
+ IExpectedSchemaNode expectedSchemaNode = buildNestedNode(expr,
typeEnv);
+ if (expectedSchemaNode != null) {
+ changed |=
+
setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) expr,
producedVar, typeEnv);
+ }
+ } else {
+ ILogicalExpression childExpr =
myExpr.getArguments().get(1).getValue();
+ if (childExpr.getExpressionTag() !=
LogicalExpressionTag.FUNCTION_CALL) {
+ // must be a variable or constant
+ IExpectedSchemaNode expectedSchemaNode = buildNestedNode(expr,
typeEnv);
+ if (expectedSchemaNode != null) {
+ changed |=
setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) expr,
producedVar,
+ typeEnv);
+ }
+ } else {
+ // as the childExpr is a function.
+ // if the function had been evaluated at compile time, it
would have been
+ // evaluated at this stage of compilation.
+ // eg: field-access(t.r.p, substring("name",2,4))
+ // this will be evaluated to field-access(t.r.p, "me") at
compile time itself.
+ // since the execution reached this branch, this means the
childExpr
+ // need to be evaluated at runtime, hence the childExpr should
also be checked
+ // for possible pushdown.
+ // eg: field-access(t.r.p, substring(x.y.age_field, 0, 4))
+ ILogicalExpression parentExpr =
myExpr.getArguments().get(0).getValue();
+ IExpectedSchemaNode parentExpectedNode =
buildNestedNode(parentExpr, typeEnv);
+ if (parentExpectedNode != null) {
+ changed |=
setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) parentExpr,
+ producedVar, typeEnv);
+ }
+ changed |= buildNestedNodes(childExpr, typeEnv, producedVar);
+ }
+ }
+ return changed;
+ }
+
private IExpectedSchemaNode buildNestedNode(ILogicalExpression expr,
IVariableTypeEnvironment typeEnv)
throws AlgebricksException {
//The current node expression
@@ -112,7 +176,6 @@
//Return null if the function is not supported.
return null;
}
-
//The parent expression
ILogicalExpression parentExpr =
myExpr.getArguments().get(0).getValue();
if (isVariable(parentExpr)) {
@@ -120,7 +183,6 @@
LogicalVariable sourceVar =
VariableUtilities.getVariable(parentExpr);
return changeNodeForVariable(sourceVar, myExpr, myExpr);
}
-
//Recursively create the parent nodes. Parent is always a nested node
AbstractComplexExpectedSchemaNode newParent =
(AbstractComplexExpectedSchemaNode)
buildNestedNode(parentExpr, typeEnv);
@@ -177,7 +239,6 @@
return handleUnion(parentExpr, parent, child);
default:
throw new IllegalStateException("Node " + parent.getType() + "
is not nested");
-
}
}
@@ -204,7 +265,6 @@
} else {
actualChild = objectNode.replaceChild(actualChild, child);
}
-
return actualChild;
}
@@ -218,7 +278,6 @@
} else {
actualChild = arrayNode.replaceChild(actualChild, child);
}
-
return actualChild;
}
diff --git a/asterixdb/asterix-app/data/hdfs/parquet/friends.json
b/asterixdb/asterix-app/data/hdfs/parquet/friends.json
new file mode 100644
index 0000000..d708ad9
--- /dev/null
+++ b/asterixdb/asterix-app/data/hdfs/parquet/friends.json
@@ -0,0 +1 @@
+{ "id": "1", "name": "Monica", "x": { "y": { "age_field": "age" } }, "t": {
"r": { "p": { "age": "26" } } } }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index c50b391..9e63c44 100644
---
a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++
b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -395,6 +395,7 @@
loadData(generatedDataBasePath, "", "heterogeneous_1.parquet",
definition, definitionSegment, false, false);
loadData(generatedDataBasePath, "", "heterogeneous_2.parquet",
definition, definitionSegment, false, false);
loadData(generatedDataBasePath, "", "parquetTypes.parquet",
definition, definitionSegment, false, false);
+ loadData(generatedDataBasePath, "", "friends.parquet", definition,
definitionSegment, false, false);
Collection<File> files =
IoUtil.getMatchingFiles(Paths.get(generatedDataBasePath +
"/external-filter"), PARQUET_FILTER);
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp
new file mode 100644
index 0000000..a601a8d
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Field access pushdown
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE ParquetType as {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*friends.parquet"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp
new file mode 100644
index 0000000..e72d412
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+EXPLAIN
+SELECT t.r.g, `field-access-by-name`(t.r.p, x.y.age_field)
+FROM ParquetDataset;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp
new file mode 100644
index 0000000..d15ba8d
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT t.r.g, `field-access-by-name`(t.r.p, x.y.age_field)
+FROM ParquetDataset;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan
new file mode 100644
index 0000000..c985a25
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan
@@ -0,0 +1 @@
+"distribute result [$$25] [cardinality: 0.0, op-cost: 0.0, total-cost:
0.0]\n-- DISTRIBUTE_RESULT |PARTITIONED|\n exchange [cardinality: 0.0,
op-cost: 0.0, total-cost: 0.0]\n -- ONE_TO_ONE_EXCHANGE |PARTITIONED|\n
assign [$$25] <- [{\"g\": $$26.getField(\"g\"), \"$1\":
$$26.getField(\"p\").getField(\"$$ParquetDataset.getField(\"x\").getField(\"y\").getField(\"age_field\")\")}]
project: [$$25] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]\n --
ASSIGN |PARTITIONED|\n assign [$$26] <-
[$$ParquetDataset.getField(\"t\").getField(\"r\")] [cardinality: 0.0, op-cost:
0.0, total-cost: 0.0]\n -- ASSIGN |PARTITIONED|\n exchange
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]\n --
ONE_TO_ONE_EXCHANGE |PARTITIONED|\n data-scan []<-[$$ParquetDataset]
<- test.ParquetDataset project ({t:{r:{p:any,g:any}},x:{y:{age_field:any}}})
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]\n -- DATASOURCE_SCAN
|PARTITIONED|\n exchange [cardinality: 0.0, op-cost: 0.0,
total-cost: 0.0]\n -- ONE_TO_ONE_EXCHANGE |PARTITIONED|\n
empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]\n
-- EMPTY_TUPLE_SOURCE |PARTITIONED|\n"
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm
new file mode 100644
index 0000000..2246335
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm
@@ -0,0 +1 @@
+{ "$1": "26" }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index ff1b325..1cb294c 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -391,6 +391,12 @@
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
+ <compilation-unit name="common/parquet/ASTERIXDB-3540">
+ <placeholder name="adapter" value="S3" />
+ <output-dir
compare="Clean-JSON">common/parquet/ASTERIXDB-3540</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
<compilation-unit name="common/parquet/array-access-pushdown">
<placeholder name="adapter" value="S3" />
<output-dir
compare="Text">common/parquet/array-access-pushdown</output-dir>
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19288
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: neo
Gerrit-Change-Id: Iac55527af143c292557158ca8e47e92538e93970
Gerrit-Change-Number: 19288
Gerrit-PatchSet: 1
Gerrit-Owner: Ritik Raj <[email protected]>
Gerrit-MessageType: newchange