This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 08c9e0518d8 [fix](mtmv) Fix rewrite by materialized view fail when
query hive table (#38909)
08c9e0518d8 is described below
commit 08c9e0518d8f00f2d6faa5ab6d354deaab483fa8
Author: seawinde <[email protected]>
AuthorDate: Fri Aug 9 17:09:29 2024 +0800
[fix](mtmv) Fix rewrite by materialized view fail when query hive table
(#38909)
mv def is
select l_orderkey, l_partkey, o_custkey, l_shipdate, o_orderdate
from ${hive_catalog_name}.${hive_database}.${hive_table}
left join ${internal_catalog}.${olap_db}.${olap_table} on
l_orderkey = o_orderkey
if we query the sql as following, it will rewrite fail by mv, the fail
info is `mv can not offer any partition for query`
select l_orderkey, l_partkey, o_custkey, l_shipdate, o_orderdate
from ${hive_catalog_name}.${hive_database}.${hive_table}
left join ${internal_catalog}.${olap_db}.${olap_table} on
l_orderkey = o_orderkey
This pr fix this problem. it would be rewritten by mv successfully.
---
.../mv/AbstractMaterializedViewRule.java | 4 +
.../nereids/rules/exploration/mv/StructInfo.java | 12 +-
.../mv/external_table/part_partition_invalid.out | 70 +++++
.../external_table/part_partition_invalid.groovy | 284 +++++++++++++++++++++
4 files changed, 365 insertions(+), 5 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java
index b34faaf1250..857fd0e51b9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java
@@ -431,6 +431,10 @@ public abstract class AbstractMaterializedViewRule
implements ExplorationRuleFac
Map<BaseTableInfo, Set<Partition>> queryUsedBaseTablePartitions = new
LinkedHashMap<>();
queryUsedBaseTablePartitions.put(relatedPartitionTable, new
HashSet<>());
queryPlan.accept(new StructInfo.QueryScanPartitionsCollector(),
queryUsedBaseTablePartitions);
+ // Bail out, not check invalid partition if not olap scan, support
later
+ if (queryUsedBaseTablePartitions.isEmpty()) {
+ return Pair.of(ImmutableMap.of(), ImmutableMap.of());
+ }
Set<String> queryUsedBaseTablePartitionNameSet =
queryUsedBaseTablePartitions.get(relatedPartitionTable)
.stream()
.map(Partition::getName)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java
index eeb21925653..fa29d4d0e12 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java
@@ -67,7 +67,6 @@ import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.BitSet;
-import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
@@ -740,14 +739,17 @@ public class StructInfo {
if (!targetTablePartitionMap.containsKey(relatedPartitionTable)) {
return catalogRelation;
}
- // todo Support other type partition table
if (catalogRelation instanceof LogicalOlapScan) {
+ // Handle olap table
LogicalOlapScan logicalOlapScan = (LogicalOlapScan)
catalogRelation;
+ Set<Partition> tablePartitions =
targetTablePartitionMap.get(relatedPartitionTable);
for (Long partitionId :
logicalOlapScan.getSelectedPartitionIds()) {
- Set<Partition> partitions =
targetTablePartitionMap.computeIfAbsent(relatedPartitionTable,
- key -> new HashSet<>());
-
partitions.add(logicalOlapScan.getTable().getPartition(partitionId));
+
tablePartitions.add(logicalOlapScan.getTable().getPartition(partitionId));
}
+ } else {
+ // todo Support other type partition table
+ // Not support to partition check now when query external
catalog table, support later.
+ targetTablePartitionMap.clear();
}
return catalogRelation;
}
diff --git
a/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out
b/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out
new file mode 100644
index 00000000000..b2cd5c559ca
--- /dev/null
+++
b/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out
@@ -0,0 +1,70 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !query_sql --
+1 2 1 2023-10-17 2023-10-17
+2 2 2 2023-10-18 2023-10-18
+3 2 3 2023-10-19 2023-10-19
+
+-- !query_mv_directly --
+1 2 1 2023-10-17 2023-10-17
+2 2 2 2023-10-18 2023-10-18
+3 2 3 2023-10-19 2023-10-19
+
+-- !after_modify_data_without_refresh_catalog --
+1 2 1 2023-10-17 2023-10-17
+2 2 2 2023-10-18 2023-10-18
+3 2 3 2023-10-19 2023-10-19
+
+-- !after_modify_and_without_refresh_catalog_19 --
+3 2 3 2023-10-19 2023-10-19
+
+-- !after_modify_and_without_refresh_catalog_18 --
+2 2 2 2023-10-18 2023-10-18
+
+-- !after_modify_data_and_refresh_catalog --
+1 2 1 2023-10-17 2023-10-17
+2 2 2 2023-10-18 2023-10-18
+3 2 3 2023-10-19 2023-10-19
+
+-- !after_modify_and_refresh_catalog_19 --
+3 2 3 2023-10-19 2023-10-19
+
+-- !after_modify_and_refresh_catalog_18 --
+2 2 2 2023-10-18 2023-10-18
+
+-- !after_modify_data_and_refresh_catalog_and_mv --
+1 2 1 2023-10-17 2023-10-17
+2 2 2 2023-10-18 2023-10-18
+3 2 3 2023-10-19 2023-10-19
+3 2 3 2023-10-19 2023-10-19
+
+-- !after_add_data_without_refresh_catalog --
+1 2 1 2023-10-17 2023-10-17
+2 2 2 2023-10-18 2023-10-18
+3 2 3 2023-10-19 2023-10-19
+3 2 3 2023-10-19 2023-10-19
+
+-- !after_add_and_without_refresh_catalog_19 --
+3 2 3 2023-10-19 2023-10-19
+3 2 3 2023-10-19 2023-10-19
+
+-- !after_add_and_without_refresh_catalog_20 --
+
+-- !after_add_data_with_refresh_catalog --
+1 2 1 2023-10-17 2023-10-17
+2 2 2 2023-10-18 2023-10-18
+3 2 3 2023-10-19 2023-10-19
+3 2 3 2023-10-19 2023-10-19
+
+-- !after_add_and_refresh_catalog_19 --
+3 2 3 2023-10-19 2023-10-19
+3 2 3 2023-10-19 2023-10-19
+
+-- !after_add_and_refresh_catalog_20 --
+
+-- !after_add_data_and_refresh_catalog_and_mv --
+\N \N 7 \N 2023-10-20
+1 2 1 2023-10-17 2023-10-17
+2 2 2 2023-10-18 2023-10-18
+3 2 3 2023-10-19 2023-10-19
+3 2 3 2023-10-19 2023-10-19
+
diff --git
a/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy
b/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy
new file mode 100644
index 00000000000..d2e6f18824d
--- /dev/null
+++
b/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy
@@ -0,0 +1,284 @@
+package mv.external_table
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("part_partition_invalid", "p0,external") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("diable Hive test. then doesn't test mv rewrite")
+ return;
+ }
+ // prepare catalog
+ def suite_name = "part_partition_invalid";
+ def externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ def hms_port = context.config.otherConfigs.get("hive2HmsPort")
+ def hive_catalog_name = "${suite_name}_catalog"
+ def hive_database = "${suite_name}_db"
+ def hive_table = "${suite_name}_orders"
+
+ sql """drop catalog if exists ${hive_catalog_name}"""
+ sql """
+ create catalog if not exists ${hive_catalog_name} properties (
+ "type"="hms",
+ 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+ );"""
+
+ sql """switch ${hive_catalog_name};"""
+ sql """drop table if exists
${hive_catalog_name}.${hive_database}.${hive_table}"""
+ sql """ drop database if exists ${hive_database}"""
+ sql """ create database ${hive_database}"""
+ sql """use ${hive_database}"""
+ sql """
+ CREATE TABLE IF NOT EXISTS ${hive_table} (
+ o_orderkey integer,
+ o_custkey integer,
+ o_orderstatus char(1),
+ o_totalprice decimalv3(15,2),
+ o_orderpriority char(15),
+ o_clerk char(15),
+ o_shippriority integer,
+ o_comment varchar(79),
+ o_orderdate date
+ ) ENGINE=hive
+ PARTITION BY list(o_orderdate)()
+ PROPERTIES (
+ "replication_num" = "1",
+ "file_format"="orc",
+ "compression"="zlib"
+ );
+ """
+
+ sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table}
values(1, 1, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-17');"""
+ sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table}
values(2, 2, 'ok', 109.2, 'c','d',2, 'mm', '2023-10-18');"""
+ sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table}
values(3, 3, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-19');"""
+
+ // prepare table and data in olap
+ def internal_catalog = "internal"
+ def olap_db = context.config.getDbNameByFile(context.file)
+ def olap_table = "${suite_name}_lineitem"
+
+ sql """switch ${internal_catalog};"""
+ sql "use ${olap_db};"
+ sql "SET enable_nereids_planner=true;"
+ sql "set runtime_filter_mode=OFF";
+ sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject';"
+
+ sql """
+ drop table if exists ${olap_table}
+ """
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${olap_table} (
+ l_orderkey integer not null,
+ l_partkey integer not null,
+ l_suppkey integer not null,
+ l_linenumber integer not null,
+ l_quantity decimalv3(15,2) not null,
+ l_extendedprice decimalv3(15,2) not null,
+ l_discount decimalv3(15,2) not null,
+ l_tax decimalv3(15,2) not null,
+ l_returnflag char(1) not null,
+ l_linestatus char(1) not null,
+ l_shipdate date not null,
+ l_commitdate date not null,
+ l_receiptdate date not null,
+ l_shipinstruct char(25) not null,
+ l_shipmode char(10) not null,
+ l_comment varchar(44) not null
+ )
+ DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber)
+ PARTITION BY RANGE(l_shipdate)
+ (FROM ('2023-10-01') TO ('2023-10-30') INTERVAL 1 DAY)
+ DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+
+ sql """
+ insert into ${olap_table} values
+ (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17',
'2023-10-17', 'a', 'b', 'yyyyyyyyy'),
+ (2, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18',
'2023-10-18', 'a', 'b', 'yyyyyyyyy'),
+ (3, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19',
'2023-10-19', 'c', 'd', 'xxxxxxxxx');
+ """
+
+ def query_sql = """
+ select l_orderkey, l_partkey, o_custkey, l_shipdate, o_orderdate
+ from ${hive_catalog_name}.${hive_database}.${hive_table}
+ left join ${internal_catalog}.${olap_db}.${olap_table} on
l_orderkey = o_orderkey
+ """
+ order_qt_query_sql """${query_sql}"""
+
+ // create partition mtmv, related partition is hive catalog
+ def mv_name = 'mv_join'
+ sql """drop materialized view if exists ${mv_name}"""
+ sql """
+ CREATE MATERIALIZED VIEW ${mv_name}
+ BUILD IMMEDIATE REFRESH AUTO ON MANUAL
+ partition by(`o_orderdate`)
+ DISTRIBUTED BY RANDOM BUCKETS 2
+ PROPERTIES ('replication_num' = '1', 'grace_period' = '0')
+ AS ${query_sql}
+ """
+
+ sql """REFRESH MATERIALIZED VIEW ${mv_name} complete"""
+ waitingMTMVTaskFinished(getJobName(olap_db, mv_name))
+ order_qt_query_mv_directly """select * from ${mv_name};"""
+
+ // test query rewrite by mv, should fail ,because
materialized_view_rewrite_enable_contain_external_table
+ // is false default
+ explain {
+ sql(""" ${query_sql}""")
+ notContains("${mv_name}(${mv_name})")
+ }
+ sql "SET materialized_view_rewrite_enable_contain_external_table=true"
+ explain {
+ sql(""" ${query_sql}""")
+ contains("${mv_name}(${mv_name})")
+ }
+
+ // data change in external table doesn't influence query rewrite,
+ // if want to use new data in external table should be refresh manually
+ sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table}
values(3, 3, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-19');"""
+ explain {
+ sql(""" ${query_sql}""")
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_modify_data_without_refresh_catalog """ ${query_sql}"""
+
+ explain {
+ sql("""
+ ${query_sql} where o_orderdate = '2023-10-19';
+ """)
+ // query invalid partition data, should hit mv, because not check now.
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_modify_and_without_refresh_catalog_19 """ ${query_sql}
where o_orderdate = '2023-10-19';"""
+
+ explain {
+ sql("""
+ ${query_sql} where o_orderdate = '2023-10-18';
+ """)
+ // query valid partition data, should hit mv
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_modify_and_without_refresh_catalog_18 """ ${query_sql}
where o_orderdate = '2023-10-18';"""
+
+ // refresh catalog cache
+ sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" =
"true"); """
+ explain {
+ sql(""" ${query_sql}""")
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_modify_data_and_refresh_catalog """ ${query_sql}"""
+
+ explain {
+ sql("""
+ ${query_sql} where o_orderdate = '2023-10-19';
+ """)
+ // query invalid partition data, should hit mv, because not check now.
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_modify_and_refresh_catalog_19 """ ${query_sql} where
o_orderdate = '2023-10-19';"""
+
+ explain {
+ sql("""
+ ${query_sql} where o_orderdate = '2023-10-18';
+ """)
+ // query valid partition data, should hit mv
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_modify_and_refresh_catalog_18 """ ${query_sql} where
o_orderdate = '2023-10-18';"""
+
+ // refresh manually
+ sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" =
"true"); """
+ sql """REFRESH MATERIALIZED VIEW ${mv_name} auto"""
+ waitingMTMVTaskFinished(getJobName(olap_db, mv_name))
+ explain {
+ sql(""" ${query_sql}""")
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_modify_data_and_refresh_catalog_and_mv """ ${query_sql}"""
+
+ // test after hive add partition
+ sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table}
values(6, 7, 'ok', 29.5, 'x', 'y', 6, 'ss', '2023-10-20');"""
+ explain {
+ sql(""" ${query_sql}""")
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_add_data_without_refresh_catalog """ ${query_sql}"""
+
+ explain {
+ sql("""
+ ${query_sql}
+ """)
+ // query invalid partition data, should hit mv, because not check now.
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_add_and_without_refresh_catalog_19 """ ${query_sql} where
o_orderdate = '2023-10-19';"""
+
+ explain {
+ sql("""
+ ${query_sql} where o_orderdate = '2023-10-20';
+ """)
+ // query valid partition data, should hit mv
+ notContains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_add_and_without_refresh_catalog_20 """ ${query_sql} where
o_orderdate = '2023-10-20';"""
+
+ // refresh catalog cache
+ sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" =
"true"); """
+ explain {
+ sql(""" ${query_sql}""")
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_add_data_with_refresh_catalog """ ${query_sql}"""
+
+ explain {
+ sql("""
+ ${query_sql} where o_orderdate = '2023-10-19';
+ """)
+ // query invalid partition data, should hit mv, because not check now.
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_add_and_refresh_catalog_19 """ ${query_sql} where
o_orderdate = '2023-10-19';"""
+
+ explain {
+ sql("""
+ ${query_sql} where o_orderdate = '2023-10-20';
+ """)
+ // query valid partition data, should hit mv
+ notContains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_add_and_refresh_catalog_20 """ ${query_sql} where
o_orderdate = '2023-10-20';"""
+
+ // refresh manually
+ sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" =
"true"); """
+ sql """REFRESH MATERIALIZED VIEW ${mv_name} auto"""
+ waitingMTMVTaskFinished(getJobName(olap_db, mv_name))
+ explain {
+ sql(""" ${query_sql}""")
+ contains("${mv_name}(${mv_name})")
+ }
+ order_qt_after_add_data_and_refresh_catalog_and_mv """ ${query_sql}"""
+
+ sql """drop table if exists
${hive_catalog_name}.${hive_database}.${hive_table}"""
+ sql """drop table if exists ${internal_catalog}.${olap_db}.${olap_table}"""
+ sql """drop database if exists ${hive_catalog_name}.${hive_database}"""
+ sql """drop materialized view if exists
${internal_catalog}.${olap_db}.${mv_name};"""
+ sql """drop catalog if exists ${hive_catalog_name}"""
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]