zabetak commented on code in PR #3229: URL: https://github.com/apache/hive/pull/3229#discussion_r857661697
########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQueryVisitor.java: ########## @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelVisitor; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexVisitorImpl; + +public class HiveSubQueryVisitor extends RelVisitor { Review Comment: Do we really need this class? Can't we somehow exploit the `SubQueryRemoveRule`? If we need this then probably we want to add some basic javadoc. ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQueryVisitor.java: ########## @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelVisitor; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexVisitorImpl; + +public class HiveSubQueryVisitor extends RelVisitor { + + @Override + public void visit(RelNode node, int ordinal, RelNode parent) { + if (node instanceof Filter) { + visit((Filter) node); + } else if (node instanceof Project) { + visit((Project) node); + } + Review Comment: Why do we need to focus only on Filter/Project ? Why not subqueries in `Join` or elsewhere? Can't we use the `RelNode#accept(RexShuttle)` for more uniform access? ########## ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_9.q: ########## @@ -0,0 +1,25 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.materializedview.rewriting=false; + +create table t1(col0 int) STORED AS ORC + TBLPROPERTIES ('transactional'='true'); + +create table t2(col0 int) STORED AS ORC + TBLPROPERTIES ('transactional'='true'); + +insert into t1(col0) values (1), (NULL); +insert into t2(col0) values (1), (2), (3), (NULL); + +create materialized view mat1 as +select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2; + +-- View can be used -> rewrite +explain cbo +select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2); Review Comment: Does it make sense to include also MV rewrite tests with subquery in project and join? ########## ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java: ########## @@ -2347,22 +2348,49 @@ private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProv return basePlan; } + /** + * Traverse the plan and collect table names from {@link TableScan} operators + * Use this method if plan does not have any sub query. + * Use {@link CalcitePlannerAction#getAllTablesUsed(RelNode)} to include sub-query expressions. + * @see HiveSubQueryRemoveRule + */ protected Set<TableName> getTablesUsed(RelNode plan) { Set<TableName> tablesUsed = new HashSet<>(); new RelVisitor() { @Override public void visit(RelNode node, int ordinal, RelNode parent) { - if (node instanceof TableScan) { - TableScan ts = (TableScan) node; - Table hiveTableMD = ((RelOptHiveTable) ts.getTable()).getHiveTableMD(); - tablesUsed.add(hiveTableMD.getFullTableName()); - } + addUsedTable(node, tablesUsed); super.visit(node, ordinal, parent); } }.go(plan); return tablesUsed; } + /** + * Traverse the plan including sub-query expressions and collect table names from {@link TableScan} operators. + */ + protected Set<TableName> getAllTablesUsed(RelNode plan) { Review Comment: Do we need to distinguish if we are going to look in sub-queries or not? If yes can't simply add a boolean parameter with an intuitive name instead of adding a new method (e.g., `includeSubqueries`, `expandSubqueries`)? ########## ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java: ########## @@ -2347,22 +2348,49 @@ private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProv return basePlan; } + /** + * Traverse the plan and collect table names from {@link TableScan} operators + * Use this method if plan does not have any sub query. + * Use {@link CalcitePlannerAction#getAllTablesUsed(RelNode)} to include sub-query expressions. + * @see HiveSubQueryRemoveRule + */ protected Set<TableName> getTablesUsed(RelNode plan) { Set<TableName> tablesUsed = new HashSet<>(); new RelVisitor() { @Override public void visit(RelNode node, int ordinal, RelNode parent) { - if (node instanceof TableScan) { - TableScan ts = (TableScan) node; - Table hiveTableMD = ((RelOptHiveTable) ts.getTable()).getHiveTableMD(); - tablesUsed.add(hiveTableMD.getFullTableName()); - } + addUsedTable(node, tablesUsed); super.visit(node, ordinal, parent); } }.go(plan); return tablesUsed; } + /** + * Traverse the plan including sub-query expressions and collect table names from {@link TableScan} operators. + */ + protected Set<TableName> getAllTablesUsed(RelNode plan) { + Set<TableName> tablesUsed = new HashSet<>(); + new HiveSubQueryVisitor() { Review Comment: Instead of creating new visitors wouldn't make sense to apply the `SubQueryRemoveRule` either inside this method or before? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
