jcamachor commented on a change in pull request #1706:
URL: https://github.com/apache/hive/pull/1706#discussion_r539730309
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
##########
@@ -57,7 +57,7 @@ public void analyzeInternal(ASTNode root) throws
SemanticException {
ASTNode tableTree = (ASTNode) root.getChild(0);
TableName tableName = getQualifiedTableName(tableTree);
- if (ctx.enableUnparse()) {
+ if (ctx.isScheduledQuery()) {
unparseTranslator.addTableNameTranslation(tableTree,
SessionState.get().getCurrentDatabase());
Review comment:
Can we add a comment (I know that the code was not added in this patch
but it is useful to have some clarification on why this is being done)?
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/Context.java
##########
@@ -336,6 +344,9 @@ private Context(Configuration conf, String executionId) {
opContext = new CompilationOpContext();
viewsTokenRewriteStreams = new HashMap<>();
+ enableUnparse =
Review comment:
Can we add a comment why we are only enabling this when this config
value is true? `enableUnparse` documentation has a description on why it is not
enabled in general. However, it is worth having a comment here, since it is
difficult to establish the connection between the config property and the
variable.
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -1945,6 +1945,18 @@ public RelOptMaterialization
getMaterializedViewForRebuild(String dbName, String
}
}
+ public List<RelOptMaterialization> getMaterialization(
Review comment:
add javadoc?
Also, should this method be renamed to `getSQLMatchingMaterializedView` or
anything more descriptive?
##########
File path: ql/src/test/queries/clientpositive/materialized_view_create_rewrite.q
##########
@@ -5,6 +5,7 @@ set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.strict.checks.cartesian.product=false;
set hive.materializedview.rewriting=true;
+set hive.materializedview.rewriting.query.text=false;
Review comment:
Why do we disable it here?
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewsCache.java
##########
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.metadata;
+
+import org.apache.calcite.plan.RelOptMaterialization;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.function.BiFunction;
+
+import static java.util.Collections.emptyList;
+import static java.util.Collections.unmodifiableList;
+
+/**
+ * Collection for storing {@link RelOptMaterialization}s.
+ * RelOptMaterialization can be lookup by
+ * - the Materialized View fully qualified name
+ * - query text.
+ * This implementation contains two {@link ConcurrentHashMap} one for name
based and one for query text based lookup.
+ * The map contents are synchronized during each dml operation: Dml operations
are performed initially on the map
+ * which provides name based lookup. The map which provides query text based
lookup is updated by lambda expressions
+ * passed to {@link ConcurrentHashMap#compute(Object, BiFunction)}.
+ */
+public class MaterializedViewsCache {
+ private static final Logger LOG =
LoggerFactory.getLogger(MaterializedViewsCache.class);
+
+ // Key is the database name. Value a map from the qualified name to the view
object.
+ private final ConcurrentMap<String, ConcurrentMap<String,
RelOptMaterialization>> materializedViews =
+ new ConcurrentHashMap<>();
+ // Map for looking up materialization by view query text
+ private final Map<String, List<RelOptMaterialization>> sqlToMaterializedView
= new ConcurrentHashMap<>();
+
+
+ public void putIfAbsent(Table materializedViewTable, RelOptMaterialization
materialization) {
+ ConcurrentMap<String, RelOptMaterialization> dbMap =
ensureDbMap(materializedViewTable);
+
+ // You store the materialized view
+ dbMap.compute(materializedViewTable.getTableName(), (mvTableName,
relOptMaterialization) -> {
+ List<RelOptMaterialization> materializationList =
sqlToMaterializedView.computeIfAbsent(
+ materializedViewTable.getViewExpandedText().toLowerCase(), s ->
new ArrayList<>());
+ materializationList.add(materialization);
+ return materialization;
+ });
+
+ LOG.debug("Materialized view {}.{} added to registry",
+ materializedViewTable.getDbName(),
materializedViewTable.getTableName());
+ }
+
+ private ConcurrentMap<String, RelOptMaterialization> ensureDbMap(Table
materializedViewTable) {
+ // We are going to create the map for each view in the given database
+ ConcurrentMap<String, RelOptMaterialization> dbMap =
+ new ConcurrentHashMap<String, RelOptMaterialization>();
+ // If we are caching the MV, we include it in the cache
+ final ConcurrentMap<String, RelOptMaterialization> prevDbMap =
materializedViews.putIfAbsent(
+ materializedViewTable.getDbName(), dbMap);
+ if (prevDbMap != null) {
+ dbMap = prevDbMap;
+ }
+ return dbMap;
+ }
+
+ public void refresh(
+ Table oldMaterializedViewTable, Table materializedViewTable,
RelOptMaterialization newMaterialization) {
+ ConcurrentMap<String, RelOptMaterialization> dbMap =
ensureDbMap(materializedViewTable);
+
+ dbMap.compute(materializedViewTable.getTableName(), (mvTableName,
existingMaterialization) -> {
+ List<RelOptMaterialization> optMaterializationList =
sqlToMaterializedView.computeIfAbsent(
+ materializedViewTable.getViewExpandedText().toLowerCase(), s ->
new ArrayList<>());
+
+ if (existingMaterialization == null) {
+ // If it was not existing, we just create it
+ optMaterializationList.add(newMaterialization);
+ return newMaterialization;
+ }
+ Table existingMaterializedViewTable =
HiveMaterializedViewUtils.extractTable(existingMaterialization);
+ if (existingMaterializedViewTable.equals(oldMaterializedViewTable)) {
+ // If the old version is the same, we replace it
+ optMaterializationList.remove(existingMaterialization);
+ optMaterializationList.add(newMaterialization);
+ return newMaterialization;
+ }
+ // Otherwise, we return existing materialization
+ return existingMaterialization;
+ });
+
+ LOG.debug("Refreshed materialized view {}.{} -> {}.{}",
+ oldMaterializedViewTable.getDbName(),
oldMaterializedViewTable.getTableName(),
+ materializedViewTable.getDbName(),
materializedViewTable.getTableName());
+ }
+
+ public void remove(Table materializedViewTable) {
+ ConcurrentMap<String, RelOptMaterialization> dbMap =
materializedViews.get(materializedViewTable.getDbName());
+ if (dbMap != null) {
+ // Delete only if the create time for the input materialized view table
and the table
+ // in the map match. Otherwise, keep the one in the map.
+ dbMap.computeIfPresent(materializedViewTable.getTableName(),
(mvTableName, oldMaterialization) -> {
+ if
(HiveMaterializedViewUtils.extractTable(oldMaterialization).equals(materializedViewTable))
{
+ List<RelOptMaterialization> materializationList =
+
sqlToMaterializedView.get(materializedViewTable.getViewExpandedText().toLowerCase());
+ materializationList.remove(oldMaterialization);
+ return null;
+ }
+ return oldMaterialization;
+ });
+ }
+
+ LOG.debug("Materialized view {}.{} removed from registry",
+ materializedViewTable.getDbName(),
materializedViewTable.getTableName());
+ }
+
+ public void remove(String dbName, String tableName) {
+ ConcurrentMap<String, RelOptMaterialization> dbMap =
materializedViews.get(dbName);
+ if (dbMap != null) {
+ dbMap.computeIfPresent(tableName, (mvTableName, relOptMaterialization)
-> {
+ String queryText =
HiveMaterializedViewUtils.extractTable(relOptMaterialization).getViewExpandedText();
+ List<RelOptMaterialization> materializationList =
sqlToMaterializedView.get(queryText.toLowerCase());
+ materializationList.remove(relOptMaterialization);
+ return null;
+ });
+
+ LOG.debug("Materialized view {}.{} removed from registry", dbName,
tableName);
+ }
+ }
+
+ public List<RelOptMaterialization> values() {
+ List<RelOptMaterialization> result = new ArrayList<>();
+ materializedViews.forEach((dbName, mvs) -> result.addAll(mvs.values()));
+ return unmodifiableList(result);
+ }
+
+ RelOptMaterialization get(String dbName, String viewName) {
+ if (materializedViews.get(dbName) != null) {
+ LOG.debug("Found materialized view {}.{} in registry", dbName, viewName);
+ return materializedViews.get(dbName).get(viewName);
+ }
+ LOG.debug("Materialized view {}.{} not found in registry", dbName,
viewName);
+ return null;
+ }
+
+ public List<RelOptMaterialization> get(String queryText) {
+ List<RelOptMaterialization> relOptMaterializationList =
sqlToMaterializedView.get(queryText.toLowerCase());
+ if (relOptMaterializationList == null) {
+ LOG.debug("No materialized view with query text '{}' found in registry",
queryText);
Review comment:
This will print the full query which will be very verbose. Maybe move
printing the query text to TRACE level and simply print `No materialized view
with similar query text found in registry` in DEBUG mode.
##########
File path:
ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out
##########
@@ -1047,6 +1047,7 @@ PREHOOK: query: select
program
from tv_view_data
PREHOOK: type: QUERY
+PREHOOK: Input: arc_view@mv_tv_view_data_av2
Review comment:
Is this the new test that you had added to verify that the casting is
matching correctly now?
##########
File path: ql/src/test/queries/clientnegative/materialized_view_no_cbo_rewrite.q
##########
@@ -1,11 +0,0 @@
-set hive.support.concurrency=true;
-set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
-set hive.strict.checks.cartesian.product=false;
-set hive.materializedview.rewriting=true;
-
-create table cmv_basetable (a int, b varchar(256), c decimal(10,2))
-stored as orc TBLPROPERTIES ('transactional'='true');
-
-insert into cmv_basetable values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2,
'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8);
-
-create materialized view cmv_mat_view as select a, b, c from cmv_basetable
sort by a;
Review comment:
Would this still fail for queries that do not go through CBO due to
missing support for some features/constructs? We do not have many of those
queries left but maybe we should add the negative test (if it is not already
there)
##########
File path:
ql/src/test/queries/clientpositive/materialized_view_create_rewrite_dummy.q
##########
@@ -6,6 +6,7 @@ set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.strict.checks.cartesian.product=false;
set hive.materializedview.rewriting=true;
+set hive.materializedview.rewriting.query.text=false;
Review comment:
Why is this disabled now? Does the test change?
##########
File path:
ql/src/test/queries/clientpositive/materialized_view_create_rewrite_multi_db.q
##########
@@ -3,6 +3,7 @@ set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.strict.checks.cartesian.product=false;
set hive.materializedview.rewriting=true;
+set hive.materializedview.rewriting.query.text=false;
Review comment:
Why is this disabled now? Does the test change?
##########
File path: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
##########
@@ -1844,6 +1844,9 @@ private static void populateLlapDaemonVarsSet(Set<String>
llapDaemonVarsSetLocal
// materialized views
HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING("hive.materializedview.rewriting",
true,
"Whether to try to rewrite queries using the materialized views
enabled for rewriting"),
+
HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_QUERY_TEXT("hive.materializedview.rewriting.query.text",
true,
Review comment:
Let's make it shorter: `hive.materializedview.rewriting.query.text` ->
`hive.materializedview.rewriting.text` or `hive.materializedview.rewriting.sql`
?
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
##########
@@ -345,9 +345,19 @@
import javax.sql.DataSource;
+import static java.util.Collections.singletonList;
+import static
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils.extractTable;
+
public class CalcitePlanner extends SemanticAnalyzer {
+ /**
+ * {@link org.antlr.runtime.TokenRewriteStream} offers the opportunity of
multiple rewrites of the same
+ * input text (in our case the sql query text). These rewrites are called
programs and identified by a string.
+ * EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM is for identifying the program which
replaces all identifiers in the
+ * query with fully qualified identifiers.
+ */
+ public static final String EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM =
"EXPANDED_QUERY_PROGRAM";
Review comment:
could this be `private`?
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewsCache.java
##########
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.metadata;
+
+import org.apache.calcite.plan.RelOptMaterialization;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.function.BiFunction;
+
+import static java.util.Collections.emptyList;
+import static java.util.Collections.unmodifiableList;
+
+/**
+ * Collection for storing {@link RelOptMaterialization}s.
+ * RelOptMaterialization can be lookup by
+ * - the Materialized View fully qualified name
+ * - query text.
+ * This implementation contains two {@link ConcurrentHashMap} one for name
based and one for query text based lookup.
+ * The map contents are synchronized during each dml operation: Dml operations
are performed initially on the map
+ * which provides name based lookup. The map which provides query text based
lookup is updated by lambda expressions
+ * passed to {@link ConcurrentHashMap#compute(Object, BiFunction)}.
+ */
+public class MaterializedViewsCache {
+ private static final Logger LOG =
LoggerFactory.getLogger(MaterializedViewsCache.class);
+
+ // Key is the database name. Value a map from the qualified name to the view
object.
+ private final ConcurrentMap<String, ConcurrentMap<String,
RelOptMaterialization>> materializedViews =
+ new ConcurrentHashMap<>();
+ // Map for looking up materialization by view query text
+ private final Map<String, List<RelOptMaterialization>> sqlToMaterializedView
= new ConcurrentHashMap<>();
+
+
+ public void putIfAbsent(Table materializedViewTable, RelOptMaterialization
materialization) {
+ ConcurrentMap<String, RelOptMaterialization> dbMap =
ensureDbMap(materializedViewTable);
+
+ // You store the materialized view
+ dbMap.compute(materializedViewTable.getTableName(), (mvTableName,
relOptMaterialization) -> {
+ List<RelOptMaterialization> materializationList =
sqlToMaterializedView.computeIfAbsent(
+ materializedViewTable.getViewExpandedText().toLowerCase(), s ->
new ArrayList<>());
+ materializationList.add(materialization);
+ return materialization;
+ });
+
+ LOG.debug("Materialized view {}.{} added to registry",
+ materializedViewTable.getDbName(),
materializedViewTable.getTableName());
+ }
+
+ private ConcurrentMap<String, RelOptMaterialization> ensureDbMap(Table
materializedViewTable) {
+ // We are going to create the map for each view in the given database
+ ConcurrentMap<String, RelOptMaterialization> dbMap =
+ new ConcurrentHashMap<String, RelOptMaterialization>();
+ // If we are caching the MV, we include it in the cache
+ final ConcurrentMap<String, RelOptMaterialization> prevDbMap =
materializedViews.putIfAbsent(
+ materializedViewTable.getDbName(), dbMap);
+ if (prevDbMap != null) {
+ dbMap = prevDbMap;
+ }
+ return dbMap;
+ }
+
+ public void refresh(
+ Table oldMaterializedViewTable, Table materializedViewTable,
RelOptMaterialization newMaterialization) {
+ ConcurrentMap<String, RelOptMaterialization> dbMap =
ensureDbMap(materializedViewTable);
+
+ dbMap.compute(materializedViewTable.getTableName(), (mvTableName,
existingMaterialization) -> {
+ List<RelOptMaterialization> optMaterializationList =
sqlToMaterializedView.computeIfAbsent(
+ materializedViewTable.getViewExpandedText().toLowerCase(), s ->
new ArrayList<>());
+
+ if (existingMaterialization == null) {
+ // If it was not existing, we just create it
+ optMaterializationList.add(newMaterialization);
+ return newMaterialization;
+ }
+ Table existingMaterializedViewTable =
HiveMaterializedViewUtils.extractTable(existingMaterialization);
+ if (existingMaterializedViewTable.equals(oldMaterializedViewTable)) {
+ // If the old version is the same, we replace it
+ optMaterializationList.remove(existingMaterialization);
+ optMaterializationList.add(newMaterialization);
+ return newMaterialization;
+ }
+ // Otherwise, we return existing materialization
+ return existingMaterialization;
+ });
+
+ LOG.debug("Refreshed materialized view {}.{} -> {}.{}",
+ oldMaterializedViewTable.getDbName(),
oldMaterializedViewTable.getTableName(),
+ materializedViewTable.getDbName(),
materializedViewTable.getTableName());
+ }
+
+ public void remove(Table materializedViewTable) {
+ ConcurrentMap<String, RelOptMaterialization> dbMap =
materializedViews.get(materializedViewTable.getDbName());
+ if (dbMap != null) {
+ // Delete only if the create time for the input materialized view table
and the table
+ // in the map match. Otherwise, keep the one in the map.
+ dbMap.computeIfPresent(materializedViewTable.getTableName(),
(mvTableName, oldMaterialization) -> {
+ if
(HiveMaterializedViewUtils.extractTable(oldMaterialization).equals(materializedViewTable))
{
+ List<RelOptMaterialization> materializationList =
+
sqlToMaterializedView.get(materializedViewTable.getViewExpandedText().toLowerCase());
Review comment:
We should not call `toLowerCase`. For instance, it would transform
literal values that should be different and then consider them equal, e.g.,
col1='AaAa' vs col1='Aaaa'.
In addition to applying the change, can you add a test for this case to see
that rewriting is triggered vs not-triggered?
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
##########
@@ -2349,6 +2362,50 @@ private RelNode
applyMaterializedViewRewriting(RelOptPlanner planner, RelNode ba
return basePlan;
}
+ private RelOptMaterialization
copyMaterializationToNewCluster(RelOptCluster optCluster, RelOptMaterialization
materialization) {
Review comment:
Can we move this to _HiveMaterializedViewsUtils.java_? I think there is
a single other place where we call _ copyNodeNewCluster_ and it's to create a
new materialization too. Maybe you can make _ copyNodeNewCluster_ private, make
_ copyMaterializationToNewCluster_ in _HiveMaterializedViewsUtils.java_, and
only call the latter.
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
##########
@@ -1945,6 +1945,18 @@ public RelOptMaterialization
getMaterializedViewForRebuild(String dbName, String
}
}
+ public List<RelOptMaterialization> getMaterialization(
+ String queryString, List<String> tablesUsed, HiveTxnManager txnMgr)
throws HiveException {
+
+ List<RelOptMaterialization> materializedViews =
+
HiveMaterializedViewsRegistry.get().getRewritingMaterializedViews(queryString);
+ if (materializedViews.isEmpty()) {
+ return Collections.emptyList();
+ }
+
+ return filterAugmentMaterializedViews(materializedViews, tablesUsed,
txnMgr);
Review comment:
_filterAugmentMaterializedViews_ contains logic that is relevant to
incremental computation of MVs, as well as other logic that works specifically
for certain plan patterns. For instance, I am not sure we should be calling
_deriveGroupingSetsMaterializedViews_ for textual queries, e.g., whether it
could lead to incorrect results.
Instead, should we call `getValidMaterializedViews(materializedViews,
tablesUsed, false, false, txnMgr)`? The caveat is that you will also need to
create a method
```
private List<RelOptMaterialization> getValidMaterializedViews(List<Table>
materializedViewTables,
List<String> tablesUsed, boolean forceMVContentsUpToDate, *boolean
tryIncrementalRewriting*,
boolean expandGroupingSets, HiveTxnManager txnMgr) throws
HiveException {
```
For the former calls, you can pass the value of the property
`HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_INCREMENTAL)` for
`tryIncrementalRewriting`. For the new call from this method, you should pass
`false`. This will effectively lead to ignoring the MVs if they are outdated.
##########
File path:
ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_2.q.out
##########
@@ -0,0 +1,334 @@
+PREHOOK: query: create table cmv_basetable_n0 (a int, b varchar(256), c
decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_basetable_n0
+POSTHOOK: query: create table cmv_basetable_n0 (a int, b varchar(256), c
decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_basetable_n0
+PREHOOK: query: insert into cmv_basetable_n0 values
+ (1, 'alfred', 10.30, 2),
+ (2, 'bob', 3.14, 3),
+ (2, 'bonnie', 172342.2, 3),
+ (3, 'calvin', 978.76, 3),
+ (3, 'charlie', 9.8, 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@cmv_basetable_n0
+POSTHOOK: query: insert into cmv_basetable_n0 values
+ (1, 'alfred', 10.30, 2),
+ (2, 'bob', 3.14, 3),
+ (2, 'bonnie', 172342.2, 3),
+ (3, 'calvin', 978.76, 3),
+ (3, 'charlie', 9.8, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@cmv_basetable_n0
+POSTHOOK: Lineage: cmv_basetable_n0.a SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_n0.b SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_n0.c SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_n0.d SCRIPT []
+PREHOOK: query: create materialized view cmv_mat_view_n0
+as select a, b, c from cmv_basetable_n0 where a = 2
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@cmv_basetable_n0
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_mat_view_n0
+POSTHOOK: query: create materialized view cmv_mat_view_n0
+as select a, b, c from cmv_basetable_n0 where a = 2
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@cmv_basetable_n0
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_mat_view_n0
+PREHOOK: query: select * from cmv_mat_view_n0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_mat_view_n0
+#### A masked pattern was here ####
+POSTHOOK: query: select * from cmv_mat_view_n0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_mat_view_n0
+#### A masked pattern was here ####
+2 bob 3.14
+2 bonnie 172342.20
+PREHOOK: query: show tblproperties cmv_mat_view_n0
+PREHOOK: type: SHOW_TBLPROPERTIES
+POSTHOOK: query: show tblproperties cmv_mat_view_n0
+POSTHOOK: type: SHOW_TBLPROPERTIES
+COLUMN_STATS_ACCURATE
{"BASIC_STATS":"true","COLUMN_STATS":{"a":"true","b":"true","c":"true"}}
+bucketing_version 2
+numFiles 1
+numFilesErasureCoded 0
+numRows 2
+rawDataSize 408
+totalSize 468
+#### A masked pattern was here ####
+PREHOOK: query: create materialized view if not exists cmv_mat_view2
+as select a, c from cmv_basetable_n0 where a = 3
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@cmv_basetable_n0
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_mat_view2
+POSTHOOK: query: create materialized view if not exists cmv_mat_view2
+as select a, c from cmv_basetable_n0 where a = 3
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@cmv_basetable_n0
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_mat_view2
+PREHOOK: query: select * from cmv_mat_view2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_mat_view2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from cmv_mat_view2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_mat_view2
+#### A masked pattern was here ####
+3 9.80
+3 978.76
+PREHOOK: query: show tblproperties cmv_mat_view2
+PREHOOK: type: SHOW_TBLPROPERTIES
+POSTHOOK: query: show tblproperties cmv_mat_view2
+POSTHOOK: type: SHOW_TBLPROPERTIES
+COLUMN_STATS_ACCURATE
{"BASIC_STATS":"true","COLUMN_STATS":{"a":"true","c":"true"}}
+bucketing_version 2
+numFiles 1
+numFilesErasureCoded 0
+numRows 2
+rawDataSize 232
+totalSize 334
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select a, c from cmv_basetable_n0 where a = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_n0
+PREHOOK: Input: default@cmv_mat_view2
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select a, c from cmv_basetable_n0 where a = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_n0
+POSTHOOK: Input: default@cmv_mat_view2
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: default.cmv_mat_view2
+ Select Operator
+ expressions: a (type: int), c (type: decimal(10,2))
+ outputColumnNames: _col0, _col1
+ ListSink
+
+PREHOOK: query: select a, c from cmv_basetable_n0 where a = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_n0
+PREHOOK: Input: default@cmv_mat_view2
+#### A masked pattern was here ####
+POSTHOOK: query: select a, c from cmv_basetable_n0 where a = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_n0
+POSTHOOK: Input: default@cmv_mat_view2
+#### A masked pattern was here ####
+3 9.80
+3 978.76
+PREHOOK: query: explain
+alter materialized view cmv_mat_view2 disable rewrite
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE
+PREHOOK: Input: default@cmv_mat_view2
+PREHOOK: Output: default@cmv_mat_view2
+POSTHOOK: query: explain
+alter materialized view cmv_mat_view2 disable rewrite
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE
+POSTHOOK: Input: default@cmv_mat_view2
+POSTHOOK: Output: default@cmv_mat_view2
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+ Stage-1 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-0
+ Alter Materialized View Rewrite
+ name: default.cmv_mat_view2
+ disable: true
+
+ Stage: Stage-1
+ Materialized View Update
+ name: default.cmv_mat_view2
+ disable rewrite: true
+
+PREHOOK: query: alter materialized view cmv_mat_view2 disable rewrite
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE
+PREHOOK: Input: default@cmv_mat_view2
+PREHOOK: Output: default@cmv_mat_view2
+POSTHOOK: query: alter materialized view cmv_mat_view2 disable rewrite
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE
+POSTHOOK: Input: default@cmv_mat_view2
+POSTHOOK: Output: default@cmv_mat_view2
+PREHOOK: query: explain
+select a, c from cmv_basetable_n0 where a = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_n0
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select a, c from cmv_basetable_n0 where a = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: cmv_basetable_n0
+ filterExpr: (a = 3) (type: boolean)
+ Filter Operator
+ predicate: (a = 3) (type: boolean)
+ Select Operator
+ expressions: 3 (type: int), c (type: decimal(10,2))
+ outputColumnNames: _col0, _col1
+ ListSink
+
+PREHOOK: query: select a, c from cmv_basetable_n0 where a = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_n0
+#### A masked pattern was here ####
+POSTHOOK: query: select a, c from cmv_basetable_n0 where a = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_n0
+#### A masked pattern was here ####
+3 9.80
+3 978.76
+PREHOOK: query: explain
+alter materialized view cmv_mat_view2 enable rewrite
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE
+PREHOOK: Input: default@cmv_mat_view2
+PREHOOK: Output: default@cmv_mat_view2
+POSTHOOK: query: explain
+alter materialized view cmv_mat_view2 enable rewrite
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE
+POSTHOOK: Input: default@cmv_mat_view2
+POSTHOOK: Output: default@cmv_mat_view2
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+ Stage-1 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-0
+ Alter Materialized View Rewrite
+ name: default.cmv_mat_view2
+ enable: true
+
+ Stage: Stage-1
+ Materialized View Update
+ name: default.cmv_mat_view2
+ retrieve and include: true
+
+PREHOOK: query: alter materialized view cmv_mat_view2 enable rewrite
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE
+PREHOOK: Input: default@cmv_mat_view2
+PREHOOK: Output: default@cmv_mat_view2
+POSTHOOK: query: alter materialized view cmv_mat_view2 enable rewrite
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE
+POSTHOOK: Input: default@cmv_mat_view2
+POSTHOOK: Output: default@cmv_mat_view2
+PREHOOK: query: explain
+select a, c from cmv_basetable_n0 where a = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_n0
+PREHOOK: Input: default@cmv_mat_view2
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select a, c from cmv_basetable_n0 where a = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_n0
+POSTHOOK: Input: default@cmv_mat_view2
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: default.cmv_mat_view2
+ Select Operator
+ expressions: a (type: int), c (type: decimal(10,2))
+ outputColumnNames: _col0, _col1
+ ListSink
+
+PREHOOK: query: select a, c from cmv_basetable_n0 where a = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_n0
+PREHOOK: Input: default@cmv_mat_view2
+#### A masked pattern was here ####
+POSTHOOK: query: select a, c from cmv_basetable_n0 where a = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_n0
+POSTHOOK: Input: default@cmv_mat_view2
+#### A masked pattern was here ####
+3 9.80
+3 978.76
+PREHOOK: query: drop materialized view cmv_mat_view2
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@cmv_mat_view2
+PREHOOK: Output: default@cmv_mat_view2
+POSTHOOK: query: drop materialized view cmv_mat_view2
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@cmv_mat_view2
+POSTHOOK: Output: default@cmv_mat_view2
+PREHOOK: query: explain
+select a, c from cmv_basetable_n0 where a = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_n0
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select a, c from cmv_basetable_n0 where a = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: cmv_basetable_n0
+ filterExpr: (a = 3) (type: boolean)
+ Filter Operator
+ predicate: (a = 3) (type: boolean)
+ Select Operator
+ expressions: 3 (type: int), c (type: decimal(10,2))
+ outputColumnNames: _col0, _col1
+ ListSink
+
+PREHOOK: query: select a, c from cmv_basetable_n0 where a = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_n0
+#### A masked pattern was here ####
+POSTHOOK: query: select a, c from cmv_basetable_n0 where a = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_n0
+#### A masked pattern was here ####
+3 9.80
+3 978.76
+PREHOOK: query: drop materialized view cmv_mat_view_n0
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@cmv_mat_view_n0
+PREHOOK: Output: default@cmv_mat_view_n0
+POSTHOOK: query: drop materialized view cmv_mat_view_n0
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@cmv_mat_view_n0
+POSTHOOK: Output: default@cmv_mat_view_n0
Review comment:
Can we also add some tests with inserts on the based tables (i.e., the
MVs become outdated and are not used), then rebuild, then verify the MV is used
again? You can see different such scenarios in current `materialized_view*`
tests.
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
##########
@@ -12565,15 +12566,17 @@ void analyzeInternal(ASTNode ast,
Supplier<PlannerContext> pcf) throws SemanticE
sinkOp = genOPTree(ast, plannerCtx);
boolean usesMasking = false;
Review comment:
Can you add a few masking tests that match / do not match based on exact
SQL text?
There are positive and negative tests for current rewriting (masking_mv in
both cases):
https://github.com/apache/hive/commit/873d31f33a061cd38be7de91b208987871fb612e#diff-3ae6a10ec619425775eeff0c135ace87cfe2c8dfbaea22f99278d7cd5d9bc330
##########
File path:
ql/src/test/org/apache/hadoop/hive/ql/metadata/TestMaterializedViewsCache.java
##########
@@ -0,0 +1,525 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.metadata;
+
+import org.apache.calcite.plan.Convention;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptMaterialization;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelOptQuery;
+import org.apache.calcite.plan.RelOptTable;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelShuttle;
+import org.apache.calcite.rel.RelVisitor;
+import org.apache.calcite.rel.RelWriter;
+import org.apache.calcite.rel.core.CorrelationId;
+import org.apache.calcite.rel.metadata.Metadata;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Litmus;
+import org.apache.calcite.util.Pair;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.emptyList;
+import static java.util.Collections.singletonList;
+import static org.hamcrest.CoreMatchers.hasItem;
+import static org.hamcrest.CoreMatchers.not;
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.Is.is;
+
+/**
+ * Functional and parallel execution tests for {@link MaterializedViewsCache}.
+ * Parallel execution test is disabled by default.
+ */
+class TestMaterializedViewsCache {
Review comment:
Neat, thanks for adding these tests!!
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
##########
@@ -2349,6 +2362,50 @@ private RelNode
applyMaterializedViewRewriting(RelOptPlanner planner, RelNode ba
return basePlan;
}
+ private RelOptMaterialization
copyMaterializationToNewCluster(RelOptCluster optCluster, RelOptMaterialization
materialization) {
+ final RelNode viewScan = materialization.tableRel;
+ final RelNode newViewScan = HiveMaterializedViewUtils.copyNodeNewCluster(
+ optCluster, viewScan);
+ return new RelOptMaterialization(newViewScan, materialization.queryRel,
null,
+ materialization.qualifiedTableName);
+ }
+
+ private boolean isMaterializedViewRewritingByTextEnabled() {
+ return
conf.getBoolVar(ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_QUERY_TEXT)
&&
+ mvRebuildMode == MaterializationRebuildMode.NONE &&
+ !getQB().isMaterializedView() &&
!ctx.isLoadingMaterializedView() && !getQB().isCTAS() &&
+ getQB().getIsQuery() &&
+ getQB().hasTableDefined();
+ }
+
+ private RelNode applyMaterializedViewRewritingByText(RelNode
calciteGenPlan, RelOptCluster optCluster) {
+ unparseTranslator.applyTranslations(ctx.getTokenRewriteStream(),
EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM);
+ String expandedQueryText = ctx.getTokenRewriteStream()
+ .toString(EXPANDED_QUERY_TOKEN_REWRITE_PROGRAM,
ast.getTokenStartIndex(), ast.getTokenStopIndex());
+ try {
+ List<RelOptMaterialization> relOptMaterializationList =
db.getMaterialization(
+ expandedQueryText, getTablesUsed(calciteGenPlan), getTxnMgr());
+ for (RelOptMaterialization relOptMaterialization :
relOptMaterializationList) {
+ try {
+ Table hiveTableMD = extractTable(relOptMaterialization);
+ if (db.validateMaterializedViewsFromRegistry(
Review comment:
Another validation is needed here to confirm whether current user has
access to such MV. See
`HiveMaterializedViewUtils.checkPrivilegeForMaterializedViews`
(https://github.com/apache/hive/commit/cf4463e2d049ceaf931d21e2ae8182f50d56b43d)
Please, add a test such as the one included in the
https://github.com/apache/hive/commit/cf4463e2d049ceaf931d21e2ae8182f50d56b43d .
##########
File path: ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q
##########
@@ -5,6 +5,7 @@ set
hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.strict.checks.cartesian.product=false;
set hive.stats.fetch.column.stats=true;
set hive.materializedview.rewriting=true;
+set hive.materializedview.rewriting.query.text=false;
Review comment:
Why is this disabled now? Does the test change?
##########
File path:
ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_2.q
##########
@@ -0,0 +1,65 @@
+-- SORT_QUERY_RESULTS
+
+SET hive.vectorized.execution.enabled=false;
+set hive.server2.materializedviews.registry.impl=DUMMY;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.strict.checks.cartesian.product=false;
+set hive.materializedview.rewriting.query.text=true;
Review comment:
No need, this is `true` by default?
##########
File path: ql/src/test/results/clientpositive/llap/avrotblsjoin.q.out
##########
@@ -73,6 +73,7 @@ POSTHOOK: Lineage: table1_1.col1 SCRIPT []
POSTHOOK: Lineage: table1_1.col2 SCRIPT []
WARNING: Comparing a bigint and a string may result in a loss of precision.
WARNING: Comparing a bigint and a string may result in a loss of precision.
+WARNING: Comparing a bigint and a string may result in a loss of precision.
Review comment:
Is this expected? Same question for the same WARNING messages in q files
below (I assume root cause is the same).
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]