[ https://issues.apache.org/jira/browse/HIVE-24274?focusedWorklogId=522496&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-522496 ]
ASF GitHub Bot logged work on HIVE-24274: ----------------------------------------- Author: ASF GitHub Bot Created on: 10/Dec/20 01:17 Start Date: 10/Dec/20 01:17 Worklog Time Spent: 10m Work Description: jcamachor commented on a change in pull request #1706: URL: https://github.com/apache/hive/pull/1706#discussion_r539730309 ########## File path: ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java ########## @@ -57,7 +57,7 @@ public void analyzeInternal(ASTNode root) throws SemanticException { ASTNode tableTree = (ASTNode) root.getChild(0); TableName tableName = getQualifiedTableName(tableTree); - if (ctx.enableUnparse()) { + if (ctx.isScheduledQuery()) { unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase()); Review comment: Can we add a comment (I know that the code was not added in this patch but it is useful to have some clarification on why this is being done)? ########## File path: ql/src/java/org/apache/hadoop/hive/ql/Context.java ########## @@ -336,6 +344,9 @@ private Context(Configuration conf, String executionId) { opContext = new CompilationOpContext(); viewsTokenRewriteStreams = new HashMap<>(); + enableUnparse = Review comment: Can we add a comment why we are only enabling this when this config value is true? `enableUnparse` documentation has a description on why it is not enabled in general. However, it is worth having a comment here, since it is difficult to establish the connection between the config property and the variable. ########## File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ########## @@ -1945,6 +1945,18 @@ public RelOptMaterialization getMaterializedViewForRebuild(String dbName, String } } + public List<RelOptMaterialization> getMaterialization( Review comment: add javadoc? Also, should this method be renamed to `getSQLMatchingMaterializedView` or anything more descriptive? ########## File path: ql/src/test/queries/clientpositive/materialized_view_create_rewrite.q ########## @@ -5,6 +5,7 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.strict.checks.cartesian.product=false; set hive.materializedview.rewriting=true; +set hive.materializedview.rewriting.query.text=false; Review comment: Why do we disable it here? ########## File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewsCache.java ########## @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.metadata; + +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.function.BiFunction; + +import static java.util.Collections.emptyList; +import static java.util.Collections.unmodifiableList; + +/** + * Collection for storing {@link RelOptMaterialization}s. + * RelOptMaterialization can be lookup by + * - the Materialized View fully qualified name + * - query text. + * This implementation contains two {@link ConcurrentHashMap} one for name based and one for query text based lookup. + * The map contents are synchronized during each dml operation: Dml operations are performed initially on the map + * which provides name based lookup. The map which provides query text based lookup is updated by lambda expressions + * passed to {@link ConcurrentHashMap#compute(Object, BiFunction)}. + */ +public class MaterializedViewsCache { + private static final Logger LOG = LoggerFactory.getLogger(MaterializedViewsCache.class); + + // Key is the database name. Value a map from the qualified name to the view object. + private final ConcurrentMap<String, ConcurrentMap<String, RelOptMaterialization>> materializedViews = + new ConcurrentHashMap<>(); + // Map for looking up materialization by view query text + private final Map<String, List<RelOptMaterialization>> sqlToMaterializedView = new ConcurrentHashMap<>(); + + + public void putIfAbsent(Table materializedViewTable, RelOptMaterialization materialization) { + ConcurrentMap<String, RelOptMaterialization> dbMap = ensureDbMap(materializedViewTable); + + // You store the materialized view + dbMap.compute(materializedViewTable.getTableName(), (mvTableName, relOptMaterialization) -> { + List<RelOptMaterialization> materializationList = sqlToMaterializedView.computeIfAbsent( + materializedViewTable.getViewExpandedText().toLowerCase(), s -> new ArrayList<>()); + materializationList.add(materialization); + return materialization; + }); + + LOG.debug("Materialized view {}.{} added to registry", + materializedViewTable.getDbName(), materializedViewTable.getTableName()); + } + + private ConcurrentMap<String, RelOptMaterialization> ensureDbMap(Table materializedViewTable) { + // We are going to create the map for each view in the given database + ConcurrentMap<String, RelOptMaterialization> dbMap = + new ConcurrentHashMap<String, RelOptMaterialization>(); + // If we are caching the MV, we include it in the cache + final ConcurrentMap<String, RelOptMaterialization> prevDbMap = materializedViews.putIfAbsent( + materializedViewTable.getDbName(), dbMap); + if (prevDbMap != null) { + dbMap = prevDbMap; + } + return dbMap; + } + + public void refresh( + Table oldMaterializedViewTable, Table materializedViewTable, RelOptMaterialization newMaterialization) { + ConcurrentMap<String, RelOptMaterialization> dbMap = ensureDbMap(materializedViewTable); + + dbMap.compute(materializedViewTable.getTableName(), (mvTableName, existingMaterialization) -> { + List<RelOptMaterialization> optMaterializationList = sqlToMaterializedView.computeIfAbsent( + materializedViewTable.getViewExpandedText().toLowerCase(), s -> new ArrayList<>()); + + if (existingMaterialization == null) { + // If it was not existing, we just create it + optMaterializationList.add(newMaterialization); + return newMaterialization; + } + Table existingMaterializedViewTable = HiveMaterializedViewUtils.extractTable(existingMaterialization); + if (existingMaterializedViewTable.equals(oldMaterializedViewTable)) { + // If the old version is the same, we replace it + optMaterializationList.remove(existingMaterialization); + optMaterializationList.add(newMaterialization); + return newMaterialization; + } + // Otherwise, we return existing materialization + return existingMaterialization; + }); + + LOG.debug("Refreshed materialized view {}.{} -> {}.{}", + oldMaterializedViewTable.getDbName(), oldMaterializedViewTable.getTableName(), + materializedViewTable.getDbName(), materializedViewTable.getTableName()); + } + + public void remove(Table materializedViewTable) { + ConcurrentMap<String, RelOptMaterialization> dbMap = materializedViews.get(materializedViewTable.getDbName()); + if (dbMap != null) { + // Delete only if the create time for the input materialized view table and the table + // in the map match. Otherwise, keep the one in the map. + dbMap.computeIfPresent(materializedViewTable.getTableName(), (mvTableName, oldMaterialization) -> { + if (HiveMaterializedViewUtils.extractTable(oldMaterialization).equals(materializedViewTable)) { + List<RelOptMaterialization> materializationList = + sqlToMaterializedView.get(materializedViewTable.getViewExpandedText().toLowerCase()); + materializationList.remove(oldMaterialization); + return null; + } + return oldMaterialization; + }); + } + + LOG.debug("Materialized view {}.{} removed from registry", + materializedViewTable.getDbName(), materializedViewTable.getTableName()); + } + + public void remove(String dbName, String tableName) { + ConcurrentMap<String, RelOptMaterialization> dbMap = materializedViews.get(dbName); + if (dbMap != null) { + dbMap.computeIfPresent(tableName, (mvTableName, relOptMaterialization) -> { + String queryText = HiveMaterializedViewUtils.extractTable(relOptMaterialization).getViewExpandedText(); + List<RelOptMaterialization> materializationList = sqlToMaterializedView.get(queryText.toLowerCase()); + materializationList.remove(relOptMaterialization); + return null; + }); + + LOG.debug("Materialized view {}.{} removed from registry", dbName, tableName); + } + } + + public List<RelOptMaterialization> values() { + List<RelOptMaterialization> result = new ArrayList<>(); + materializedViews.forEach((dbName, mvs) -> result.addAll(mvs.values())); + return unmodifiableList(result); + } + + RelOptMaterialization get(String dbName, String viewName) { + if (materializedViews.get(dbName) != null) { + LOG.debug("Found materialized view {}.{} in registry", dbName, viewName); + return materializedViews.get(dbName).get(viewName); + } + LOG.debug("Materialized view {}.{} not found in registry", dbName, viewName); + return null; + } + + public List<RelOptMaterialization> get(String queryText) { + List<RelOptMaterialization> relOptMaterializationList = sqlToMaterializedView.get(queryText.toLowerCase()); + if (relOptMaterializationList == null) { + LOG.debug("No materialized view with query text '{}' found in registry", queryText); Review comment: This will print the full query which will be very verbose. Maybe move printing the query text to TRACE level and simply print `No materialized view with similar query text found in registry` in DEBUG mode. ########## File path: ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out ########## @@ -1047,6 +1047,7 @@ PREHOOK: query: select program from tv_view_data PREHOOK: type: QUERY +PREHOOK: Input: arc_view@mv_tv_view_data_av2 Review comment: Is this the new test that you had added to verify that the casting is matching correctly now? ########## File path: ql/src/test/queries/clientnegative/materialized_view_no_cbo_rewrite.q ########## @@ -1,11 +0,0 @@ -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.strict.checks.cartesian.product=false; -set hive.materializedview.rewriting=true; - -create table cmv_basetable (a int, b varchar(256), c decimal(10,2)) -stored as orc TBLPROPERTIES ('transactional'='true'); - -insert into cmv_basetable values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2, 'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8); - -create materialized view cmv_mat_view as select a, b, c from cmv_basetable sort by a; Review comment: Would this still fail for queries that do not go through CBO due to missing support for some features/constructs? We do not have many of those queries left but maybe we should add the negative test (if it is not already there) ########## File path: ql/src/test/queries/clientpositive/materialized_view_create_rewrite_dummy.q ########## @@ -6,6 +6,7 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.strict.checks.cartesian.product=false; set hive.materializedview.rewriting=true; +set hive.materializedview.rewriting.query.text=false; Review comment: Why is this disabled now? Does the test change? ########## File path: ql/src/test/queries/clientpositive/materialized_view_create_rewrite_multi_db.q ########## @@ -3,6 +3,7 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.strict.checks.cartesian.product=false; set hive.materializedview.rewriting=true; +set hive.materializedview.rewriting.query.text=false; Review comment: Why is this disabled now? Does the test change? ########## File path: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ########## @@ -1844,6 +1844,9 @@ private static void populateLlapDaemonVarsSet(Set<String> llapDaemonVarsSetLocal // materialized views HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING("hive.materializedview.rewriting", true, "Whether to try to rewrite queries using the materialized views enabled for rewriting"), + HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_QUERY_TEXT("hive.materializedview.rewriting.query.text", true, Review comment: Let's make it shorter: `hive.materializedview.rewriting.query.text` -> `hive.materializedview.rewriting.text` or `hive.materializedview.rewriting.sql` ? ########## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ########## @@ -345,9 +345,19 @@ import javax.sql.DataSource; +import static java.util.Collections.singletonList; +import static org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils.extractTable; + public class CalcitePlanner extends SemanticAnalyzer { + /** + * {@link org.antlr.runtime.TokenRewriteStream} offers the opportunity of multiple rewrites of the same + * input text (in our case the sql query text). These rewrites are called programs and identified by a string. + * EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM is for identifying the program which replaces all identifiers in the + * query with fully qualified identifiers. + */ + public static final String EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM = "EXPANDED_QUERY_PROGRAM"; Review comment: could this be `private`? ########## File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewsCache.java ########## @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.metadata; + +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.function.BiFunction; + +import static java.util.Collections.emptyList; +import static java.util.Collections.unmodifiableList; + +/** + * Collection for storing {@link RelOptMaterialization}s. + * RelOptMaterialization can be lookup by + * - the Materialized View fully qualified name + * - query text. + * This implementation contains two {@link ConcurrentHashMap} one for name based and one for query text based lookup. + * The map contents are synchronized during each dml operation: Dml operations are performed initially on the map + * which provides name based lookup. The map which provides query text based lookup is updated by lambda expressions + * passed to {@link ConcurrentHashMap#compute(Object, BiFunction)}. + */ +public class MaterializedViewsCache { + private static final Logger LOG = LoggerFactory.getLogger(MaterializedViewsCache.class); + + // Key is the database name. Value a map from the qualified name to the view object. + private final ConcurrentMap<String, ConcurrentMap<String, RelOptMaterialization>> materializedViews = + new ConcurrentHashMap<>(); + // Map for looking up materialization by view query text + private final Map<String, List<RelOptMaterialization>> sqlToMaterializedView = new ConcurrentHashMap<>(); + + + public void putIfAbsent(Table materializedViewTable, RelOptMaterialization materialization) { + ConcurrentMap<String, RelOptMaterialization> dbMap = ensureDbMap(materializedViewTable); + + // You store the materialized view + dbMap.compute(materializedViewTable.getTableName(), (mvTableName, relOptMaterialization) -> { + List<RelOptMaterialization> materializationList = sqlToMaterializedView.computeIfAbsent( + materializedViewTable.getViewExpandedText().toLowerCase(), s -> new ArrayList<>()); + materializationList.add(materialization); + return materialization; + }); + + LOG.debug("Materialized view {}.{} added to registry", + materializedViewTable.getDbName(), materializedViewTable.getTableName()); + } + + private ConcurrentMap<String, RelOptMaterialization> ensureDbMap(Table materializedViewTable) { + // We are going to create the map for each view in the given database + ConcurrentMap<String, RelOptMaterialization> dbMap = + new ConcurrentHashMap<String, RelOptMaterialization>(); + // If we are caching the MV, we include it in the cache + final ConcurrentMap<String, RelOptMaterialization> prevDbMap = materializedViews.putIfAbsent( + materializedViewTable.getDbName(), dbMap); + if (prevDbMap != null) { + dbMap = prevDbMap; + } + return dbMap; + } + + public void refresh( + Table oldMaterializedViewTable, Table materializedViewTable, RelOptMaterialization newMaterialization) { + ConcurrentMap<String, RelOptMaterialization> dbMap = ensureDbMap(materializedViewTable); + + dbMap.compute(materializedViewTable.getTableName(), (mvTableName, existingMaterialization) -> { + List<RelOptMaterialization> optMaterializationList = sqlToMaterializedView.computeIfAbsent( + materializedViewTable.getViewExpandedText().toLowerCase(), s -> new ArrayList<>()); + + if (existingMaterialization == null) { + // If it was not existing, we just create it + optMaterializationList.add(newMaterialization); + return newMaterialization; + } + Table existingMaterializedViewTable = HiveMaterializedViewUtils.extractTable(existingMaterialization); + if (existingMaterializedViewTable.equals(oldMaterializedViewTable)) { + // If the old version is the same, we replace it + optMaterializationList.remove(existingMaterialization); + optMaterializationList.add(newMaterialization); + return newMaterialization; + } + // Otherwise, we return existing materialization + return existingMaterialization; + }); + + LOG.debug("Refreshed materialized view {}.{} -> {}.{}", + oldMaterializedViewTable.getDbName(), oldMaterializedViewTable.getTableName(), + materializedViewTable.getDbName(), materializedViewTable.getTableName()); + } + + public void remove(Table materializedViewTable) { + ConcurrentMap<String, RelOptMaterialization> dbMap = materializedViews.get(materializedViewTable.getDbName()); + if (dbMap != null) { + // Delete only if the create time for the input materialized view table and the table + // in the map match. Otherwise, keep the one in the map. + dbMap.computeIfPresent(materializedViewTable.getTableName(), (mvTableName, oldMaterialization) -> { + if (HiveMaterializedViewUtils.extractTable(oldMaterialization).equals(materializedViewTable)) { + List<RelOptMaterialization> materializationList = + sqlToMaterializedView.get(materializedViewTable.getViewExpandedText().toLowerCase()); Review comment: We should not call `toLowerCase`. For instance, it would transform literal values that should be different and then consider them equal, e.g., col1='AaAa' vs col1='Aaaa'. In addition to applying the change, can you add a test for this case to see that rewriting is triggered vs not-triggered? ########## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ########## @@ -2349,6 +2362,50 @@ private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode ba return basePlan; } + private RelOptMaterialization copyMaterializationToNewCluster(RelOptCluster optCluster, RelOptMaterialization materialization) { Review comment: Can we move this to _HiveMaterializedViewsUtils.java_? I think there is a single other place where we call _ copyNodeNewCluster_ and it's to create a new materialization too. Maybe you can make _ copyNodeNewCluster_ private, make _ copyMaterializationToNewCluster_ in _HiveMaterializedViewsUtils.java_, and only call the latter. ########## File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ########## @@ -1945,6 +1945,18 @@ public RelOptMaterialization getMaterializedViewForRebuild(String dbName, String } } + public List<RelOptMaterialization> getMaterialization( + String queryString, List<String> tablesUsed, HiveTxnManager txnMgr) throws HiveException { + + List<RelOptMaterialization> materializedViews = + HiveMaterializedViewsRegistry.get().getRewritingMaterializedViews(queryString); + if (materializedViews.isEmpty()) { + return Collections.emptyList(); + } + + return filterAugmentMaterializedViews(materializedViews, tablesUsed, txnMgr); Review comment: _filterAugmentMaterializedViews_ contains logic that is relevant to incremental computation of MVs, as well as other logic that works specifically for certain plan patterns. For instance, I am not sure we should be calling _deriveGroupingSetsMaterializedViews_ for textual queries, e.g., whether it could lead to incorrect results. Instead, should we call `getValidMaterializedViews(materializedViews, tablesUsed, false, false, txnMgr)`? The caveat is that you will also need to create a method ``` private List<RelOptMaterialization> getValidMaterializedViews(List<Table> materializedViewTables, List<String> tablesUsed, boolean forceMVContentsUpToDate, *boolean tryIncrementalRewriting*, boolean expandGroupingSets, HiveTxnManager txnMgr) throws HiveException { ``` For the former calls, you can pass the value of the property `HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_INCREMENTAL)` for `tryIncrementalRewriting`. For the new call from this method, you should pass `false`. This will effectively lead to ignoring the MVs if they are outdated. ########## File path: ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_2.q.out ########## @@ -0,0 +1,334 @@ +PREHOOK: query: create table cmv_basetable_n0 (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_basetable_n0 +POSTHOOK: query: create table cmv_basetable_n0 (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_basetable_n0 +PREHOOK: query: insert into cmv_basetable_n0 values + (1, 'alfred', 10.30, 2), + (2, 'bob', 3.14, 3), + (2, 'bonnie', 172342.2, 3), + (3, 'calvin', 978.76, 3), + (3, 'charlie', 9.8, 1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@cmv_basetable_n0 +POSTHOOK: query: insert into cmv_basetable_n0 values + (1, 'alfred', 10.30, 2), + (2, 'bob', 3.14, 3), + (2, 'bonnie', 172342.2, 3), + (3, 'calvin', 978.76, 3), + (3, 'charlie', 9.8, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@cmv_basetable_n0 +POSTHOOK: Lineage: cmv_basetable_n0.a SCRIPT [] +POSTHOOK: Lineage: cmv_basetable_n0.b SCRIPT [] +POSTHOOK: Lineage: cmv_basetable_n0.c SCRIPT [] +POSTHOOK: Lineage: cmv_basetable_n0.d SCRIPT [] +PREHOOK: query: create materialized view cmv_mat_view_n0 +as select a, b, c from cmv_basetable_n0 where a = 2 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_basetable_n0 +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_mat_view_n0 +POSTHOOK: query: create materialized view cmv_mat_view_n0 +as select a, b, c from cmv_basetable_n0 where a = 2 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_basetable_n0 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_mat_view_n0 +PREHOOK: query: select * from cmv_mat_view_n0 +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_mat_view_n0 +#### A masked pattern was here #### +POSTHOOK: query: select * from cmv_mat_view_n0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_mat_view_n0 +#### A masked pattern was here #### +2 bob 3.14 +2 bonnie 172342.20 +PREHOOK: query: show tblproperties cmv_mat_view_n0 +PREHOOK: type: SHOW_TBLPROPERTIES +POSTHOOK: query: show tblproperties cmv_mat_view_n0 +POSTHOOK: type: SHOW_TBLPROPERTIES +COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"a":"true","b":"true","c":"true"}} +bucketing_version 2 +numFiles 1 +numFilesErasureCoded 0 +numRows 2 +rawDataSize 408 +totalSize 468 +#### A masked pattern was here #### +PREHOOK: query: create materialized view if not exists cmv_mat_view2 +as select a, c from cmv_basetable_n0 where a = 3 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_basetable_n0 +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_mat_view2 +POSTHOOK: query: create materialized view if not exists cmv_mat_view2 +as select a, c from cmv_basetable_n0 where a = 3 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_basetable_n0 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_mat_view2 +PREHOOK: query: select * from cmv_mat_view2 +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_mat_view2 +#### A masked pattern was here #### +POSTHOOK: query: select * from cmv_mat_view2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_mat_view2 +#### A masked pattern was here #### +3 9.80 +3 978.76 +PREHOOK: query: show tblproperties cmv_mat_view2 +PREHOOK: type: SHOW_TBLPROPERTIES +POSTHOOK: query: show tblproperties cmv_mat_view2 +POSTHOOK: type: SHOW_TBLPROPERTIES +COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"a":"true","c":"true"}} +bucketing_version 2 +numFiles 1 +numFilesErasureCoded 0 +numRows 2 +rawDataSize 232 +totalSize 334 +#### A masked pattern was here #### +PREHOOK: query: explain +select a, c from cmv_basetable_n0 where a = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable_n0 +PREHOOK: Input: default@cmv_mat_view2 +#### A masked pattern was here #### +POSTHOOK: query: explain +select a, c from cmv_basetable_n0 where a = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable_n0 +POSTHOOK: Input: default@cmv_mat_view2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.cmv_mat_view2 + Select Operator + expressions: a (type: int), c (type: decimal(10,2)) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: select a, c from cmv_basetable_n0 where a = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable_n0 +PREHOOK: Input: default@cmv_mat_view2 +#### A masked pattern was here #### +POSTHOOK: query: select a, c from cmv_basetable_n0 where a = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable_n0 +POSTHOOK: Input: default@cmv_mat_view2 +#### A masked pattern was here #### +3 9.80 +3 978.76 +PREHOOK: query: explain +alter materialized view cmv_mat_view2 disable rewrite +PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE +PREHOOK: Input: default@cmv_mat_view2 +PREHOOK: Output: default@cmv_mat_view2 +POSTHOOK: query: explain +alter materialized view cmv_mat_view2 disable rewrite +POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE +POSTHOOK: Input: default@cmv_mat_view2 +POSTHOOK: Output: default@cmv_mat_view2 +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Alter Materialized View Rewrite + name: default.cmv_mat_view2 + disable: true + + Stage: Stage-1 + Materialized View Update + name: default.cmv_mat_view2 + disable rewrite: true + +PREHOOK: query: alter materialized view cmv_mat_view2 disable rewrite +PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE +PREHOOK: Input: default@cmv_mat_view2 +PREHOOK: Output: default@cmv_mat_view2 +POSTHOOK: query: alter materialized view cmv_mat_view2 disable rewrite +POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE +POSTHOOK: Input: default@cmv_mat_view2 +POSTHOOK: Output: default@cmv_mat_view2 +PREHOOK: query: explain +select a, c from cmv_basetable_n0 where a = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain +select a, c from cmv_basetable_n0 where a = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cmv_basetable_n0 + filterExpr: (a = 3) (type: boolean) + Filter Operator + predicate: (a = 3) (type: boolean) + Select Operator + expressions: 3 (type: int), c (type: decimal(10,2)) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: select a, c from cmv_basetable_n0 where a = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable_n0 +#### A masked pattern was here #### +POSTHOOK: query: select a, c from cmv_basetable_n0 where a = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable_n0 +#### A masked pattern was here #### +3 9.80 +3 978.76 +PREHOOK: query: explain +alter materialized view cmv_mat_view2 enable rewrite +PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE +PREHOOK: Input: default@cmv_mat_view2 +PREHOOK: Output: default@cmv_mat_view2 +POSTHOOK: query: explain +alter materialized view cmv_mat_view2 enable rewrite +POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE +POSTHOOK: Input: default@cmv_mat_view2 +POSTHOOK: Output: default@cmv_mat_view2 +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Alter Materialized View Rewrite + name: default.cmv_mat_view2 + enable: true + + Stage: Stage-1 + Materialized View Update + name: default.cmv_mat_view2 + retrieve and include: true + +PREHOOK: query: alter materialized view cmv_mat_view2 enable rewrite +PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE +PREHOOK: Input: default@cmv_mat_view2 +PREHOOK: Output: default@cmv_mat_view2 +POSTHOOK: query: alter materialized view cmv_mat_view2 enable rewrite +POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE +POSTHOOK: Input: default@cmv_mat_view2 +POSTHOOK: Output: default@cmv_mat_view2 +PREHOOK: query: explain +select a, c from cmv_basetable_n0 where a = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable_n0 +PREHOOK: Input: default@cmv_mat_view2 +#### A masked pattern was here #### +POSTHOOK: query: explain +select a, c from cmv_basetable_n0 where a = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable_n0 +POSTHOOK: Input: default@cmv_mat_view2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: default.cmv_mat_view2 + Select Operator + expressions: a (type: int), c (type: decimal(10,2)) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: select a, c from cmv_basetable_n0 where a = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable_n0 +PREHOOK: Input: default@cmv_mat_view2 +#### A masked pattern was here #### +POSTHOOK: query: select a, c from cmv_basetable_n0 where a = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable_n0 +POSTHOOK: Input: default@cmv_mat_view2 +#### A masked pattern was here #### +3 9.80 +3 978.76 +PREHOOK: query: drop materialized view cmv_mat_view2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_mat_view2 +PREHOOK: Output: default@cmv_mat_view2 +POSTHOOK: query: drop materialized view cmv_mat_view2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_mat_view2 +POSTHOOK: Output: default@cmv_mat_view2 +PREHOOK: query: explain +select a, c from cmv_basetable_n0 where a = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain +select a, c from cmv_basetable_n0 where a = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: cmv_basetable_n0 + filterExpr: (a = 3) (type: boolean) + Filter Operator + predicate: (a = 3) (type: boolean) + Select Operator + expressions: 3 (type: int), c (type: decimal(10,2)) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: select a, c from cmv_basetable_n0 where a = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable_n0 +#### A masked pattern was here #### +POSTHOOK: query: select a, c from cmv_basetable_n0 where a = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable_n0 +#### A masked pattern was here #### +3 9.80 +3 978.76 +PREHOOK: query: drop materialized view cmv_mat_view_n0 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_mat_view_n0 +PREHOOK: Output: default@cmv_mat_view_n0 +POSTHOOK: query: drop materialized view cmv_mat_view_n0 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_mat_view_n0 +POSTHOOK: Output: default@cmv_mat_view_n0 Review comment: Can we also add some tests with inserts on the based tables (i.e., the MVs become outdated and are not used), then rebuild, then verify the MV is used again? You can see different such scenarios in current `materialized_view*` tests. ########## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ########## @@ -12565,15 +12566,17 @@ void analyzeInternal(ASTNode ast, Supplier<PlannerContext> pcf) throws SemanticE sinkOp = genOPTree(ast, plannerCtx); boolean usesMasking = false; Review comment: Can you add a few masking tests that match / do not match based on exact SQL text? There are positive and negative tests for current rewriting (masking_mv in both cases): https://github.com/apache/hive/commit/873d31f33a061cd38be7de91b208987871fb612e#diff-3ae6a10ec619425775eeff0c135ace87cfe2c8dfbaea22f99278d7cd5d9bc330 ########## File path: ql/src/test/org/apache/hadoop/hive/ql/metadata/TestMaterializedViewsCache.java ########## @@ -0,0 +1,525 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.metadata; + +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptQuery; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; +import org.apache.calcite.rel.RelVisitor; +import org.apache.calcite.rel.RelWriter; +import org.apache.calcite.rel.core.CorrelationId; +import org.apache.calcite.rel.metadata.Metadata; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Litmus; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import static java.util.Arrays.asList; +import static java.util.Collections.emptyList; +import static java.util.Collections.singletonList; +import static org.hamcrest.CoreMatchers.hasItem; +import static org.hamcrest.CoreMatchers.not; +import static org.hamcrest.CoreMatchers.nullValue; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.Is.is; + +/** + * Functional and parallel execution tests for {@link MaterializedViewsCache}. + * Parallel execution test is disabled by default. + */ +class TestMaterializedViewsCache { Review comment: Neat, thanks for adding these tests!! ########## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ########## @@ -2349,6 +2362,50 @@ private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode ba return basePlan; } + private RelOptMaterialization copyMaterializationToNewCluster(RelOptCluster optCluster, RelOptMaterialization materialization) { + final RelNode viewScan = materialization.tableRel; + final RelNode newViewScan = HiveMaterializedViewUtils.copyNodeNewCluster( + optCluster, viewScan); + return new RelOptMaterialization(newViewScan, materialization.queryRel, null, + materialization.qualifiedTableName); + } + + private boolean isMaterializedViewRewritingByTextEnabled() { + return conf.getBoolVar(ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_QUERY_TEXT) && + mvRebuildMode == MaterializationRebuildMode.NONE && + !getQB().isMaterializedView() && !ctx.isLoadingMaterializedView() && !getQB().isCTAS() && + getQB().getIsQuery() && + getQB().hasTableDefined(); + } + + private RelNode applyMaterializedViewRewritingByText(RelNode calciteGenPlan, RelOptCluster optCluster) { + unparseTranslator.applyTranslations(ctx.getTokenRewriteStream(), EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM); + String expandedQueryText = ctx.getTokenRewriteStream() + .toString(EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM, ast.getTokenStartIndex(), ast.getTokenStopIndex()); + try { + List<RelOptMaterialization> relOptMaterializationList = db.getMaterialization( + expandedQueryText, getTablesUsed(calciteGenPlan), getTxnMgr()); + for (RelOptMaterialization relOptMaterialization : relOptMaterializationList) { + try { + Table hiveTableMD = extractTable(relOptMaterialization); + if (db.validateMaterializedViewsFromRegistry( Review comment: Another validation is needed here to confirm whether current user has access to such MV. See `HiveMaterializedViewUtils.checkPrivilegeForMaterializedViews` (https://github.com/apache/hive/commit/cf4463e2d049ceaf931d21e2ae8182f50d56b43d) Please, add a test such as the one included in the https://github.com/apache/hive/commit/cf4463e2d049ceaf931d21e2ae8182f50d56b43d . ########## File path: ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q ########## @@ -5,6 +5,7 @@ set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.strict.checks.cartesian.product=false; set hive.stats.fetch.column.stats=true; set hive.materializedview.rewriting=true; +set hive.materializedview.rewriting.query.text=false; Review comment: Why is this disabled now? Does the test change? ########## File path: ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_2.q ########## @@ -0,0 +1,65 @@ +-- SORT_QUERY_RESULTS + +SET hive.vectorized.execution.enabled=false; +set hive.server2.materializedviews.registry.impl=DUMMY; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.materializedview.rewriting.query.text=true; Review comment: No need, this is `true` by default? ########## File path: ql/src/test/results/clientpositive/llap/avrotblsjoin.q.out ########## @@ -73,6 +73,7 @@ POSTHOOK: Lineage: table1_1.col1 SCRIPT [] POSTHOOK: Lineage: table1_1.col2 SCRIPT [] WARNING: Comparing a bigint and a string may result in a loss of precision. WARNING: Comparing a bigint and a string may result in a loss of precision. +WARNING: Comparing a bigint and a string may result in a loss of precision. Review comment: Is this expected? Same question for the same WARNING messages in q files below (I assume root cause is the same). ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 522496) Time Spent: 0.5h (was: 20m) > Implement Query Text based MaterializedView rewrite > --------------------------------------------------- > > Key: HIVE-24274 > URL: https://issues.apache.org/jira/browse/HIVE-24274 > Project: Hive > Issue Type: Improvement > Reporter: Krisztian Kasa > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 0.5h > Remaining Estimate: 0h > > Besides the way queries are currently rewritten to use materialized views in > Hive this project provides an alternative: > Compare the query text with the materialized views query text stored. If we > found a match the original query's logical plan can be replaced by a scan on > the materialized view. > - Only materialized views which are enabled to rewrite can participate > - Use existing *HiveMaterializedViewsRegistry* through *Hive* object by > adding a lookup method by query text. > - There might be more than one materialized views which have the same query > text. In this case chose the first valid one. > - Validation can be done by calling > *Hive.validateMaterializedViewsFromRegistry()* > - The scope of this first patch is rewriting queries which entire text can be > matched only. > - Use the expanded query text (fully qualified column and table names) for > comparing -- This message was sent by Atlassian Jira (v8.3.4#803005)