>From Preetham Poluparthi <[email protected]>:

Preetham Poluparthi has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21139?usp=email )


Change subject: [ASTERIXDB-3755][EXT] Improve parquet row group filters whhile 
querying external collections
......................................................................

[ASTERIXDB-3755][EXT] Improve parquet row group filters whhile querying 
external collections

- user model changes: no
- storage format changes: no
- interface changes: no

Ext-ref: MB-70714
Change-Id: I04c20ddb68b9bee8e3b92d4f90796bc57c965840
---
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.01.ddl.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.02.update.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.03.ddl.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.04.query.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.05.query.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-nested-fields/parquet-nested-fields.04.adm
A 
asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-nested-fields/parquet-nested-fields.05.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
M 
asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ParquetFilterBuilder.java
9 files changed, 258 insertions(+), 13 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/39/21139/1

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.01.ddl.sqlpp
new file mode 100644
index 0000000..abff4f0
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.01.ddl.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test if exists;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE TYPE ColumnType2 AS {
+};
+
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.02.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.02.update.sqlpp
new file mode 100644
index 0000000..28d6def
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.02.update.sqlpp
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+COPY (
+select c.*
+    from
+    [
+      {
+        "id" : 1,
+        "name" : "Alice",
+        "address" : { "street" : "100 MG Road", "city" : "Bangalore", "zip" : 
"560001" },
+        "phones" : ["9876543210", "9123456780"],
+        "orders" : [
+          { "order_id" : 101, "amount" : 250.0, "items" : ["laptop", "mouse"] 
},
+          { "order_id" : 102, "amount" : 30.5, "items" : ["keyboard"] }
+        ]
+      },
+      {
+        "id" : 2,
+        "name" : "Bob",
+        "address" : { "street" : "200 Wall St", "city" : "New York", "zip" : 
"10005" },
+        "phones" : ["2125551234"],
+        "orders" : [
+          { "order_id" : 201, "amount" : 99.99, "items" : ["monitor"] }
+        ]
+      },
+      {
+        "id" : 3,
+        "name" : "Charlie",
+        "address" : { "street" : "50 Indiranagar", "city" : "Bangalore", "zip" 
: "560038" },
+        "phones" : [],
+        "orders" : [
+          { "order_id" : 301, "amount" : 500.0, "items" : ["phone", "case", 
"charger"] },
+          { "order_id" : 302, "amount" : 15.0, "items" : ["cable"] },
+          { "order_id" : 303, "amount" : 75.0, "items" : ["headphones", 
"adapter"] }
+        ]
+      },
+      {
+        "id" : 4,
+        "name" : "Diana",
+        "address" : { "street" : "10 Downing St", "city" : "London", "zip" : 
"SW1A 2AA" },
+        "phones" : ["4420712345", "4420798765"],
+        "orders" : []
+      }
+    ] c
+
+
+    ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-nested-fields")
+WITH {
+    %template_colons%,
+    %additionalProperties%
+    "format":"parquet",
+    "version" : "2"
+};
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.03.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.03.ddl.sqlpp
new file mode 100644
index 0000000..826d052
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.03.ddl.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+CREATE EXTERNAL DATASET DatasetCopy(ColumnType2) USING %adapter%
+(
+  %template%,
+  %additional_Properties%
+  ("definition"="%path_prefix%copy-to-result/parquet-nested-fields"),
+  ("format" = "parquet"),
+  ("requireVersionChangeDetection"="false"),
+  ("include"="*.parquet")
+);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.04.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.04.query.sqlpp
new file mode 100644
index 0000000..5707636
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.04.query.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+/*
+ *  This test is to make sure
+ *   row group filters are applied correctly while reading parquet files.
+ *  Logs should not contain "Error creating Parquet row-group filter 
expression"
+ */
+
+SELECT c.*
+FROM DatasetCopy c
+where c.address.city = "Bangalore"
+and "560001" = c.zip
+order by c.id ;
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.05.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.05.query.sqlpp
new file mode 100644
index 0000000..06c297a
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-nested-fields/parquet-nested-fields.05.query.sqlpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+/*
+ *  This test is to make sure
+ *  row group filters are applied correctly while reading parquet files.
+ *  Logs should not contain "Error creating Parquet row-group filter 
expression"
+ *
+ */
+
+
+
+SELECT c.id, c.name , o.order_id, o.amount, o.items
+FROM DatasetCopy c
+UNNEST c.orders o
+where c.address.city = "Bangalore" and o.order_id = 101
+order by c.id , o.order_id;
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-nested-fields/parquet-nested-fields.04.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-nested-fields/parquet-nested-fields.04.adm
new file mode 100644
index 0000000..5a1508e
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-nested-fields/parquet-nested-fields.04.adm
@@ -0,0 +1,2 @@
+{ "address": { "zip": "560001", "city": "Bangalore", "street": "100 MG Road" 
}, "name": "Alice", "phones": [ "9876543210", "9123456780" ], "orders": [ { 
"amount": 250.0, "order_id": 101, "items": [ "laptop", "mouse" ] }, { "amount": 
30.5, "order_id": 102, "items": [ "keyboard" ] } ], "id": 1 }
+{ "address": { "zip": "560038", "city": "Bangalore", "street": "50 
Indiranagar" }, "name": "Charlie", "phones": [  ], "orders": [ { "amount": 
500.0, "order_id": 301, "items": [ "phone", "case", "charger" ] }, { "amount": 
15.0, "order_id": 302, "items": [ "cable" ] }, { "amount": 75.0, "order_id": 
303, "items": [ "headphones", "adapter" ] } ], "id": 3 }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-nested-fields/parquet-nested-fields.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-nested-fields/parquet-nested-fields.05.adm
new file mode 100644
index 0000000..0a463a2
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-nested-fields/parquet-nested-fields.05.adm
@@ -0,0 +1 @@
+{ "id": 1, "name": "Alice", "order_id": 101, "amount": 250.0, "items": [ 
"laptop", "mouse" ] }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 09743fe..6023fd7 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -80,6 +80,16 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="copy-to">
+      <compilation-unit name="parquet-nested-fields">
+        <placeholder name="adapter" value="S3" />
+        <placeholder name="pathprefix" value="" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additionalProperties" 
value='"container":"playground",' />
+        <placeholder name="additional_Properties" 
value='("container"="playground"),' />
+        <output-dir compare="Text">parquet-nested-fields</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="copy-to">
       <compilation-unit name="parquet-null1">
         <placeholder name="adapter" value="S3" />
         <placeholder name="pathprefix" value="" />
diff --git 
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ParquetFilterBuilder.java
 
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ParquetFilterBuilder.java
index 6cdef59..0130756 100644
--- 
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ParquetFilterBuilder.java
+++ 
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/ParquetFilterBuilder.java
@@ -74,9 +74,20 @@
         return parquetFilterPredicate;
     }

-    private FilterPredicate createComparisonExpression(ILogicalExpression 
columnName, ILogicalExpression constValue,
+    private FilterPredicate createComparisonExpression(ILogicalExpression 
arg1, ILogicalExpression arg2,
             FunctionIdentifier fid) throws AlgebricksException {
-        ConstantExpression constExpr = (ConstantExpression) constValue;
+        ILogicalExpression columnName;
+        ConstantExpression constExpr;
+        if (arg1.getExpressionTag().equals(LogicalExpressionTag.CONSTANT)) {
+            constExpr = (ConstantExpression) arg1;
+            columnName = arg2;
+        } else if 
(arg2.getExpressionTag().equals(LogicalExpressionTag.CONSTANT)) {
+            constExpr = (ConstantExpression) arg2;
+            columnName = constExpr;
+        } else {
+            throw new RuntimeException("Unsupported filter expression type");
+        }
+
         if (constExpr.getValue().isNull() || constExpr.getValue().isMissing()) 
{
             throw new RuntimeException("Unsupported literal type: " + 
constExpr.getValue());
         }
@@ -136,7 +147,7 @@
         }
         List<Mutable<ILogicalExpression>> args = funcExpr.getArguments();
         if (fid.equals(AlgebricksBuiltinFunctions.AND) || 
fid.equals(AlgebricksBuiltinFunctions.OR)) {
-            return createAndOrPredicate(fid, args, 0);
+            return createAndOrPredicate(fid, args, 0, args.size());
         } else {
             return createComparisonExpression(args.get(0).getValue(), 
args.get(1).getValue(), fid);
         }
@@ -192,23 +203,40 @@

     // Converts or(pred1, pred2, pred3) to or(pred1, or(pred2, pred3))
     private FilterPredicate createAndOrPredicate(FunctionIdentifier function, 
List<Mutable<ILogicalExpression>> args,
-            int index) throws AlgebricksException {
-        if (index == args.size() - 2) {
+            int leftInclusive, int rightExclusive) throws AlgebricksException {
+        if (rightExclusive - leftInclusive == 1) {
+            return createLeafFilterPredicate(args.get(leftInclusive));
+        } else if (rightExclusive - leftInclusive == 2) {
+            FilterPredicate left = 
createLeafFilterPredicate(args.get(leftInclusive)),
+                    right = createLeafFilterPredicate(args.get(leftInclusive + 
1));
             if (function.equals(AlgebricksBuiltinFunctions.AND)) {
-                return 
FilterApi.and(createFilterExpression(args.get(0).getValue()),
-                        createFilterExpression(args.get(1).getValue()));
+                return FilterApi.and(left, right);
             } else {
-                return 
FilterApi.or(createFilterExpression(args.get(0).getValue()),
-                        createFilterExpression(args.get(1).getValue()));
+                return FilterApi.or(left, right);
             }
         } else {
+            int middle = (leftInclusive + rightExclusive) / 2;
+            FilterPredicate left = createAndOrPredicate(function, args, 
leftInclusive, middle),
+                    right = createAndOrPredicate(function, args, middle, 
rightExclusive);
             if (function.equals(AlgebricksBuiltinFunctions.AND)) {
-                return 
FilterApi.and(createFilterExpression(args.get(index).getValue()),
-                        createAndOrPredicate(function, args, index + 1));
+                return FilterApi.and(left, right);
             } else {
-                return 
FilterApi.or(createFilterExpression(args.get(index).getValue()),
-                        createAndOrPredicate(function, args, index + 1));
+                return FilterApi.or(left, right);
             }
         }
     }
+
+    private FilterPredicate 
createLeafFilterPredicate(Mutable<ILogicalExpression> expression)
+            throws AlgebricksException {
+        if (expression.get().getExpressionTag() == 
LogicalExpressionTag.FUNCTION_CALL) {
+            AbstractFunctionCallExpression functionCall = 
(AbstractFunctionCallExpression) expression.get();
+            if (functionCall.getArguments().size() != 2) {
+                throw new RuntimeException("Error creating Filter for 
functions with arguments size other than 2");
+            }
+            return 
createComparisonExpression(functionCall.getArguments().get(0).get(),
+                    functionCall.getArguments().get(1).get(), 
functionCall.getFunctionIdentifier());
+        } else {
+            throw new RuntimeException("Unsupported expression: " + 
expression.get());
+        }
+    }
 }

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21139?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings?usp=email

Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: lumina
Gerrit-Change-Id: I04c20ddb68b9bee8e3b92d4f90796bc57c965840
Gerrit-Change-Number: 21139
Gerrit-PatchSet: 1
Gerrit-Owner: Preetham Poluparthi <[email protected]>

Reply via email to