This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new e16dbe5071c [test](case) add partition prune cases (#44722) e16dbe5071c is described below commit e16dbe5071cc47783a67ee252dccda4916ec61de Author: feiniaofeiafei <moail...@selectdb.com> AuthorDate: Tue Dec 3 22:13:24 2024 +0800 [test](case) add partition prune cases (#44722) --- .../partition_prune/auto_partition.groovy | 92 ++++++ .../partition_prune/hive_partition_prune.groovy | 136 +++++++++ .../int_as_date_partition_col.groovy | 203 +++++++++++++ .../multi_column_range_partition.groovy | 260 ++++++++++++++++ .../partition_prune/one_col_list_partition.groovy | 249 ++++++++++++++++ .../partition_prune/one_col_range_partition.groovy | 326 +++++++++++++++++++++ .../varchar_as_date_partition_col.groovy | 251 ++++++++++++++++ 7 files changed, 1517 insertions(+) diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/auto_partition.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/auto_partition.groovy new file mode 100644 index 00000000000..ee6a40c7a0d --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/partition_prune/auto_partition.groovy @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("auto_partition") { + sql "drop table if exists one_col_range_partition_date_func" + sql """create table one_col_range_partition_date_func (a int, dt datetime not null, d date, c varchar(100)) duplicate key(a) + auto partition by range(date_trunc(dt,'day')) () distributed by hash(a) properties("replication_num"="1");""" + sql """INSERT INTO one_col_range_partition_date_func SELECT number, + date_add('2020-01-01 00:00:00', interval number hour), + cast(date_add('2020-01-01 00:00:00', interval number hour) as date), cast(number as varchar(65533)) FROM numbers('number'='10000');""" + sql "drop table if exists one_col_range_partition_date_func_month" + sql """create table one_col_range_partition_date_func_month(a int, dt datetime not null, d date, c varchar(100)) duplicate key(a) + auto partition by range(date_trunc(dt,'month')) () distributed by hash(a) properties("replication_num"="1");""" + sql """INSERT INTO one_col_range_partition_date_func_month SELECT number, + date_add('2020-01-01 00:00:00', interval number hour), + cast(date_add('2020-01-01 00:00:00', interval number hour) as date), cast(number as varchar(65533)) FROM numbers('number'='10000');""" + // auto partition + // partition by range(date_trunc(dt, 'day')) + explain { + sql "SELECT * FROM one_col_range_partition_date_func WHERE dt<'2020-10-01 00:00:00'" + contains("partitions=274/417") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func WHERE dt<='2020-10-01 00:00:00'" + contains("partitions=275/417") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func WHERE dt>'2020-10-01 00:00:00'" + contains("partitions=143/417") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func WHERE dt>='2020-10-01 00:00:00'" + contains("partitions=143/417") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func WHERE dt='2020-10-01 00:00:00'" + contains("partitions=1/417") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func WHERE dt<=>'2020-10-01 00:00:00'" + contains("partitions=1/417") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func WHERE dt!='2020-10-01 00:00:00'" + contains("partitions=417/417") + } + + // partition by range(date_trunc(dt, 'month')) + explain { + sql "SELECT * FROM one_col_range_partition_date_func_month WHERE dt<'2020-10-01 00:00:00'" + contains("partitions=9/14") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func_month WHERE dt<='2020-10-01 00:00:00'" + contains("partitions=10/14") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func_month WHERE dt>'2020-10-01 00:00:00'" + contains("partitions=5/14") + } + + explain { + sql "SELECT * FROM one_col_range_partition_date_func_month WHERE dt>='2020-10-01 00:00:00'" + contains("partitions=5/14") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func_month WHERE dt='2020-10-01 00:00:00'" + contains("partitions=1/14 (p20201001000000)") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func_month WHERE dt<=>'2020-10-01 00:00:00'" + contains("partitions=1/14 (p20201001000000)") + } + explain { + sql "SELECT * FROM one_col_range_partition_date_func_month WHERE dt!='2020-10-01 00:00:00'" + contains("partitions=14/14") + } +} \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/hive_partition_prune.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/hive_partition_prune.groovy new file mode 100644 index 00000000000..ebf4d35d3a1 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/partition_prune/hive_partition_prune.groovy @@ -0,0 +1,136 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("hive_partition_prune") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable Hive test.") + return; + } + + String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp") + String extHiveHmsPort = context.config.otherConfigs.get("hive3HmsPort") + String catalog_name = "test_external_catalog_hive_partition_prune" + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + sql """switch ${catalog_name};""" + sql "drop database if exists test_hive_partition_prune" + sql "create database test_hive_partition_prune" + sql "use test_hive_partition_prune" + sql "drop table if exists test_hive_partition " + sql """create table test_hive_partition (a int, b int, p int) + engine=hive partition by list(p)() + properties("file_format"="parquet","orc.compress"="zstd");""" + + sql "insert into test_hive_partition values(1,3,1),(1,2,2),(1,2,3),(1,2,4),(1,2,5),(1,6,null)" + + explain { + sql "select * from test_hive_partition where p=1 or p=2" + contains("partition=2/6") + } + + explain { + sql "SELECT * FROM test_hive_partition WHERE p in (1,2) and p in (2,3,4)" + contains("partition=1/6") + } + + explain { + sql "SELECT * FROM test_hive_partition WHERE (p=1 or p=2) and p in (3,5,2)" + contains("partition=1/6") + } + explain { + sql "SELECT * FROM test_hive_partition WHERE (p=1 xor p=2)" + contains("partition=2/6") + } + explain { + sql "SELECT * FROM test_hive_partition WHERE p in (1, 5,6)" + contains("partition=2/6") + } + explain { + sql "SELECT * FROM test_hive_partition WHERE p not in (15,6,1, '2021-01-02 00:00:00')" + contains("partition=4/6") + } + explain { + sql "SELECT * FROM test_hive_partition WHERE p not in (1, 5,6,null)" + contains("partition=0/6") + } + explain { + sql "select * from test_hive_partition where !(p is not null)" + contains("partition=1/6") + } + explain { + sql "select * from test_hive_partition where p is null" + contains("partition=1/6") + } + explain { + sql "select * from test_hive_partition where p is not null" + contains("partition=5/6") + } + explain { + sql "select * from test_hive_partition where not p is null" + contains("partition=5/6") + } + explain { + sql "select * from test_hive_partition where !(p is null)" + contains("partition=5/6") + } + + explain { + sql "select * from test_hive_partition where p <=> null" + contains("partition=1/6") + } + explain { + sql "select * from test_hive_partition where !(p <=> null)" + contains("partition=5/6") + } + + explain { + sql "SELECT * FROM test_hive_partition WHERE p<1" + contains("partition=0/6") + } + explain { + sql "SELECT * FROM test_hive_partition WHERE p<=2" + contains("partition=2/6") + } + explain { + sql "SELECT * FROM test_hive_partition WHERE p>2" + contains("partition=3/6") + } + explain { + sql "SELECT * FROM test_hive_partition WHERE p>=3" + contains("partition=3/6") + } + explain { + sql "SELECT * FROM test_hive_partition WHERE p=4" + contains("partition=1/6") + } + explain { + sql "SELECT * FROM test_hive_partition WHERE p<=>5" + contains("partition=1/6") + } + explain { + sql "SELECT * FROM test_hive_partition WHERE p!=5 and p!=6" + contains("partition=4/6") + } + sql "drop table if exists test_hive_partition " +} \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/int_as_date_partition_col.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/int_as_date_partition_col.groovy new file mode 100644 index 00000000000..5e65e6445ef --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/partition_prune/int_as_date_partition_col.groovy @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("int_as_date_partition_col") { + sql "drop table if exists partition_int" + sql """CREATE TABLE partition_int(a int, dt int) PARTITION BY range(dt) ( + partition p20240101 values less than ("20240101"), + partition p20240201 values less than ("20240201"), + partition p20240301 values less than ("20240301"), + partition p20240401 values less than ("20240401"), + partition p20240501 values less than ("20240501"), + partition p20240601 values less than ("20240601") + ) + distributed BY hash(a) + properties("replication_num"="1");""" + + sql """insert into partition_int + SELECT number,cast(date_add('2023-11-05', interval number day) as int) FROM numbers('number'='150');""" + sql "drop table if exists partition_int_has_null" + sql """ + CREATE TABLE partition_int_has_null(a int, dt int) PARTITION BY range(dt) ( + partition p20240101 values less than ("20240101"), + partition p20240201 values less than ("20240201"), + partition p20240301 values less than ("20240301"), + partition p20240401 values less than ("20240401"), + partition p20240501 values less than ("20240501"), + partition p20240601 values less than ("20240601") + ) + distributed BY hash(a) properties("replication_num"="1");""" + + sql """insert into partition_int_has_null + SELECT number,cast(date_add('2023-11-05', interval number day) as int) FROM numbers('number'='120');""" + + explain { + sql "SELECT * FROM partition_int WHERE dt<'20240501' and dt>'20240301'" + contains("partitions=2/6 (p20240401,p20240501)") + } + explain { + sql "SELECT * FROM partition_int WHERE dt<'20240701' or dt>='20241001'" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } + + explain { + sql "SELECT * FROM partition_int WHERE (dt<'20240301' or dt>='20241001') or dt<=>'20240905'" + contains("partitions=3/6 (p20240101,p20240201,p20240301)") + } + explain { + sql "SELECT * FROM partition_int WHERE (dt<'20240301' and dt>='20240201') or dt<=>'20240905'" + contains("partitions=1/6 (p20240301)") + } + + explain { + sql "SELECT * FROM partition_int WHERE (dt<'20240701' xor dt>='20240601')" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } + + explain { + sql """SELECT count(*) FROM partition_int WHERE + (date_trunc(dt,'month')<'2024-8-01' and date_trunc(dt,'month')>'2024-6-01' )""" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } + + explain { + sql """SELECT count(*) FROM partition_int WHERE date_trunc(dt,'month')<'2024-07-01' ;""" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } + + explain { + sql """SELECT count(*) FROM partition_int WHERE + !(date_trunc(dt,'month')<'2024-8-01' and date_trunc(dt,'month')>'2024-6-01' );""" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } + + explain { + sql """ + SELECT count(*) FROM partition_int WHERE + !(dt<'20241001' or date_trunc(dt,'month')<'2024-7-01' and date_trunc(dt,'month')>'2024-6-01' ) + """ + contains("VEMPTYSET") + } + + explain { + sql "SELECT * FROM partition_int WHERE !('2020-09-01' not like '%2020-10-01 00:00:00%')" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } + explain { + sql "SELECT * FROM partition_int WHERE !('2020-09-01' not in ('2020-10-01','2020-09-01'))" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } + explain { + sql "SELECT * FROM partition_int WHERE !('2020-09-01' in ('2020-10-01','2020-09-01'))" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM partition_int WHERE !('2020-10-02'>'2020-10-01')" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM partition_int WHERE !(dt>='20240301' and a<100) " + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501") + } + explain { + sql "SELECT * FROM partition_int WHERE !(a<100 )" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501") + } + + explain { + sql "SELECT * FROM partition_int WHERE !(dt>='20240301' or a<100 )" + contains("partitions=3/6 (p20240101,p20240201,p20240301)") + } + + explain { + sql "SELECT * FROM partition_int WHERE !(dt>='20240301' or (a<100 and dt<'20240801'))" + contains("partitions=3/6 (p20240101,p20240201,p20240301)") + } + + explain { + sql "SELECT * FROM partition_int WHERE dt regexp '2024-10-01'" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } + + explain { + sql "SELECT * FROM partition_int WHERE dt like '20241001'" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } + explain { + sql "SELECT * FROM partition_int WHERE dt in ('20240301', '20240302')" + contains("partitions=1/6 (p20240401)") + } + + explain { + sql "SELECT * FROM partition_int WHERE dt not in ('20240201', '20240202')" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } + + explain { + sql "select * from partition_int_has_null where dt is null" + contains("partitions=1/6 (p20240101)") + } + explain { + sql "select * from partition_int_has_null where dt is not null" + contains("partitions=4/6 (p20240101,p20240201,p20240301,p20240401)") + } + explain { + sql "select * from partition_int_has_null where not dt is null" + contains("partitions=4/6 (p20240101,p20240201,p20240301,p20240401)") + } + explain { + sql "select * from partition_int_has_null where !(dt is null)" + contains("partitions=4/6 (p20240101,p20240201,p20240301,p20240401)") + } + explain { + sql "select * from partition_int_has_null where dt <=> null" + contains("partitions=1/6 (p20240101)") + } + explain { + sql "select * from partition_int_has_null where !(dt <=> null)" + contains("partitions=4/6 (p20240101,p20240201,p20240301,p20240401)") + } + + explain { + sql "SELECT * FROM partition_int WHERE dt<'20240301'" + contains("partitions=3/6 (p20240101,p20240201,p20240301)") + } + explain { + sql "SELECT * FROM partition_int WHERE dt<='20240301'" + contains("partitions=4/6 (p20240101,p20240201,p20240301,p20240401)") + } + explain { + sql "SELECT * FROM partition_int WHERE dt>'20240301'" + contains("partitions=2/6 (p20240401,p20240501)") + } + explain { + sql "SELECT * FROM partition_int WHERE dt>='20240301'" + contains("partitions=2/6 (p20240401,p20240501)") + } + explain { + sql "SELECT * FROM partition_int WHERE dt='20240301'" + contains("partitions=1/6 (p20240401)") + } + explain { + sql "SELECT * FROM partition_int WHERE dt<=>'20240302'" + contains("partitions=1/6 (p20240401)") + } + explain { + sql "SELECT * FROM partition_int WHERE dt!='20241001'" + contains("partitions=5/6 (p20240101,p20240201,p20240301,p20240401,p20240501)") + } +} \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/multi_column_range_partition.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/multi_column_range_partition.groovy new file mode 100644 index 00000000000..78da2304542 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/partition_prune/multi_column_range_partition.groovy @@ -0,0 +1,260 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("multi_column_range_partition") { + sql "drop table if exists t_multi_column_range_partition" + sql """ + create table t_multi_column_range_partition(a int, dt datetime, v int) partition by range(a, dt) + ( + partition p0 values [(0,'2024-01-01 00:00:00'), (10,'2024-01-10 00:00:00')), + partition p10 values [(10,'2024-01-10 00:00:00'), (20,'2024-01-20 00:00:00')), + partition p20 values [(20,'2024-01-20 00:00:00'), (30,'2024-01-31 00:00:00')), + partition p30 values [(30,'2024-01-31 00:00:00'), (40,'2024-02-10 00:00:00')), + partition p40 values [(40,'2024-02-10 00:00:00'), (50,'2024-02-20 00:00:00')) + ) + distributed by hash(a) properties("replication_num"="1"); """ + + sql """insert into t_multi_column_range_partition values(0,'2024-01-01 00:00:00',2),(1,'2024-01-01 00:00:00',2),(1,'2025-01-01 00:00:00',2), + (10,'2024-01-10 00:00:00',3),(10,'2024-01-11 00:00:00',200),(12,'2021-01-01 00:00:00',2), + (25,'2024-01-10 00:00:00',3),(20,'2024-01-11 00:00:00',200),(30,'2021-01-01 00:00:00',2), + (40,'2024-01-01 00:00:00',2),(40,'2024-01-31 00:00:00',2),(10,'2024-01-9 00:00:00',1000),(10,'2024-01-10 00:00:00',1000),(10,'2024-01-10 01:00:00',1000), + (2,'2023-01-10 01:00:00',1000) + """ + + sql "drop table if exists t_multi_column_partition_datetime_first" + sql """ + create table t_multi_column_partition_datetime_first(a int, dt datetime, v int not null) partition by range(dt,a) + ( + partition p0 values [('2024-01-01 00:00:00',0), ('2024-01-10 00:00:00',10)), + partition p10 values [('2024-01-10 00:00:00',10), ('2024-01-20 00:00:00',20)), + partition p20 values [('2024-01-20 00:00:00',20), ('2024-01-31 00:00:00',30)), + partition p30 values [('2024-01-31 00:00:00',30), ('2024-02-10 00:00:00',40)), + partition p40 values [('2024-02-10 00:00:00',40), ('2024-02-20 00:00:00',50)) + ) + distributed by hash(a) properties("replication_num"="1");""" + + sql "insert into t_multi_column_partition_datetime_first values(0,'2024-01-01 00:00:00',2)" + sql "insert into t_multi_column_partition_datetime_first values(1,'2024-01-01 00:00:00',2)" + sql "insert into t_multi_column_partition_datetime_first values(10,'2024-01-10 00:00:00',3),(10,'2024-01-11 00:00:00',200)" + sql "insert into t_multi_column_partition_datetime_first values(25,'2024-01-10 00:00:00',3),(20,'2024-01-11 00:00:00',200)" + sql "insert into t_multi_column_partition_datetime_first values(40,'2024-01-01 00:00:00',2),(40,'2024-01-31 00:00:00',2)" + sql "insert into t_multi_column_partition_datetime_first values(10,'2024-01-9 00:00:00',1000),(10,'2024-01-10 00:00:00',1000),(10,'2024-01-10 01:00:00',1000)" + + explain { + sql "select * from t_multi_column_range_partition where a=10 and date_trunc(dt, 'day') <'2024-01-10'" + contains("partitions=1/5 (p0)") + } + sql "set PARTITION_PRUNING_EXPAND_THRESHOLD=5;" + explain { + sql "select * from t_multi_column_range_partition where a=10 and date_trunc(dt, 'day') <'2024-01-10'" + contains("partitions=2/5 (p0,p10)") + } + for (int i = 0; i < 2; ++i) { + if (i == 1) { + sql "set PARTITION_PRUNING_EXPAND_THRESHOLD=5;" + } else if (i == 0) { + sql "set PARTITION_PRUNING_EXPAND_THRESHOLD=30;" + } + explain { + sql "select * from t_multi_column_range_partition where a=10 and date_trunc(dt, 'day') ='2024-01-10'" + contains("partitions=2/5 (p0,p10)") + } + explain { + sql "select * from t_multi_column_range_partition where a<19 and (date_trunc(dt, 'day') <'2024-01-20' OR date_trunc(dt, 'day') >'2024-02-10')" + contains("partitions=2/5 (p0,p10)") + } + explain { + sql "select * from t_multi_column_range_partition where a>1 and a<30 and (date_trunc(dt, 'day') <'2024-01-20' OR date_trunc(dt, 'day') >'2024-02-10')" + contains("partitions=3/5 (p0,p10,p20)") + } + explain { + sql "select * from t_multi_column_range_partition where a>1 and a<30 and (date_trunc(dt, 'day') >'2024-01-20' and date_trunc(dt, 'day') <'2024-02-10')" + contains("partitions=3/5 (p0,p10,p20)") + } + + explain { + sql "select * from t_multi_column_range_partition where !(a > 1 and a < 30 and(date_trunc(dt, 'day') > '2024-01-20' and date_trunc(dt, 'day') < '2024-02-10') )" + contains("partitions=4/5 (p0,p10,p20,p30)") + } + explain { + sql "select * from t_multi_column_range_partition where !(a >= 10 or date_trunc(dt, 'day') < '2024-01-10' )" + contains("partitions=1/5 (p0)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE a>1 and date_trunc(dt, 'day') regexp '2020-10-01 00:00:00'" + contains("partitions=4/5 (p0,p10,p20,p30)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE a>1 and !date_trunc(dt, 'day') regexp '2020-10-01 00:00:00'" + contains("partitions=4/5 (p0,p10,p20,p30)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE a=1 and !date_trunc(dt, 'day') regexp '2020-10-01 00:00:00'" + contains("partitions=1/5 (p0)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE a!=1 and !date_trunc(dt, 'day') like '2020-10-01 00:00:00'" + contains("partitions=4/5 (p0,p10,p20,p30)") + } + + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE a in (7,8) and date_trunc(dt, 'day') in ('2024-01-01', '2024-10-02')" + contains("partitions=1/5 (p0)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE a not in (7,8) and date_trunc(dt, 'day') not in ('2024-01-01', '2024-10-02')" + contains("partitions=4/5 (p0,p10,p20,p30)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE a in (7,8) and date_trunc(dt, 'day') not in ('2024-01-01', '2024-10-02')" + contains("partitions=1/5 (p0)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE a in (7,8,15) and date_trunc(dt, 'day') <'2024-01-20'" + contains("partitions=2/5 (p0,p10)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE (a in (7,8,15) or a>=31) and date_trunc(dt, 'day') >'2024-01-20'" + contains("partitions=3/5 (p0,p10,p30)") + } + + explain { + sql "select * from t_multi_column_partition_datetime_first where dt is null" + contains("VEMPTYSET") + } + explain { + sql "select * from t_multi_column_partition_datetime_first where date_trunc(dt,'day') is null" + contains("partitions=3/5 (p0,p10,p30)") + } + explain { + sql "select * from t_multi_column_partition_datetime_first where date_trunc(dt,'day') is not null" + contains("partitions=3/5 (p0,p10,p30)") + } + explain { + sql "select * from t_multi_column_partition_datetime_first where a=1 and date_trunc(dt,'month') is null" + contains("partitions=1/5 (p30)") + } + explain { + sql "select * from t_multi_column_partition_datetime_first where a>1 and date_trunc(dt,'month') is null" + contains("partitions=1/5 (p30)") + } + explain { + sql "select * from t_multi_column_partition_datetime_first where date_trunc(dt,'month') is null" + contains("partitions=1/5 (p30)") + } + explain { + sql "select * from t_multi_column_partition_datetime_first where a=1 and date_trunc(dt,'month') is not null" + contains("partitions=3/5 (p0,p10,p30)") + } + + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')<'2024-1-19 00:00:00'" + contains("partitions=2/5 (p0,p10)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')<='2024-1-10 00:00:00'" + contains("partitions=2/5 (p0,p10)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')>'2024-1-10 00:00:00'" + contains("partitions=2/5 (p10,p30)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')>'2024-1-11 00:00:00'" + contains("partitions=2/5 (p10,p30)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')>='2024-1-10 00:00:00'" + contains("partitions=3/5 (p0,p10,p30)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')='2024-1-2 00:00:00'" + contains("partitions=1/5 (p0)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')='2020-10-01 00:00:00'" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')<=>'2020-10-01 00:00:00'" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')!='2020-10-01 00:00:00'" + contains("partitions=3/5 (p0,p10,p30)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')<'2024-1-19 00:00:00' and a>10" + contains("partitions=2/5 (p0,p10)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')<='2024-1-10 00:00:00' and a=10" + contains("partitions=2/5 (p0,p10)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')>'2024-1-10 00:00:00' and a<2" + contains("partitions=2/5 (p10,p30)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')='2024-1-10 00:00:00' and a >100" + contains("partitions=2/5 (p0,p10)") + } + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE date_trunc(dt,'day')='2024-1-1 00:00:00' and a <0" + contains("partitions=1/5 (p0)") + } + + + explain { + sql "SELECT * FROM t_multi_column_partition_datetime_first WHERE coalesce(date_trunc(dt,'day'), null, '2020-01-01') <'2021-1-07 00:00:00'" + contains("partitions=3/5 (p0,p10,p30)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE coalesce(date_trunc(dt,'day') <'2021-1-07 00:00:00' , true, false) and a=9" + contains("partitions=1/5 (p0)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE if(date_trunc(dt,'day') <'2021-1-07 00:00:00', true, false) and a<20" + contains("partitions=2/5 (p0,p10)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE if(a>1, dt<'2001-1-01 00:00:00', dt<'2001-1-01 00:00:00')" + contains("partitions=4/5 (p0,p10,p20,p30)") + } + explain { + sql "SELECT * FROM t_multi_column_range_partition WHERE if(dt<'2021-1-01 00:00:00', dt<'2001-1-01 00:00:00', dt>'2031-1-01 00:00:00')" + contains("partitions=4/5 (p0,p10,p20,p30)") + } + + explain { + sql """SELECT * FROM t_multi_column_range_partition WHERE case when dt<'2024-1-10 00:00:00' then false when dt<'2024-1-20 00:00:00' then false + else true end and a=10;""" + contains("partitions=2/5 (p0,p10)") + } + + explain { + sql """SELECT * FROM t_multi_column_range_partition WHERE case when date_trunc(dt,'day')<'2024-1-10 00:00:00' then false when date_trunc(dt,'day')<'2024-1-20 00:00:00' then true + else true end and a<20;""" + contains("partitions=2/5 (p0,p10)") + } + explain { + sql """SELECT * FROM t_multi_column_range_partition WHERE case when date_trunc(dt,'day')<'2024-1-10 00:00:00' then dt + else '2023-01-01 00:00:00' end <'2021-01-06 00:00:00' and a<20;""" + contains("partitions=2/5 (p0,p10)") + } + } +} \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/one_col_list_partition.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/one_col_list_partition.groovy new file mode 100644 index 00000000000..4de512376d1 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/partition_prune/one_col_list_partition.groovy @@ -0,0 +1,249 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("one_col_list_partition") { + sql "drop table if exists one_col_list_partition_date" + sql """create table one_col_list_partition_date(a int, dt datetime, d date, c varchar(100)) duplicate key(a) + partition by list(dt) + ( + partition p1 values in ('2021-01-01 00:00:00', '2021-01-02 00:00:00'), + partition p2 values in ('2021-01-03 00:00:00', '2021-01-04 00:00:00'), + partition p3 values in ('2021-01-05 00:00:00', '2021-01-06 00:00:00'), + partition p4 values in ('2021-01-07 00:00:00', '2021-01-08 00:00:00'), + partition p5 values in ('2021-01-09 00:00:00', '2021-01-10 00:00:00'), + partition p6 values in ('2021-01-11 00:00:00', '2021-01-12 00:00:00'), + partition p7 values in ('2021-01-13 00:00:00', '2021-01-14 00:00:00'), + partition p8 values in ('2021-01-15 00:00:00', '2021-01-16 00:00:00'), + partition p9 values in ('2021-01-17 00:00:00', '2021-01-18 00:00:00') + ) + distributed by hash(a) + properties("replication_num"="1");""" + + sql """ + insert into one_col_list_partition_date values(1,'2021-01-01 00:00:00',null, 'abc'),(1,'2021-01-02 00:00:00',null, 'abc') + ,(1,'2021-01-03 00:00:00',null, 'abc'),(1,'2021-01-04 00:00:00',null, 'abc') + ,(1,'2021-01-05 00:00:00',null, 'abc'),(1,'2021-01-06 00:00:00',null, 'abc'),(1,'2021-01-07 00:00:00',null, 'abc'),(1,'2021-01-08 00:00:00',null, 'abc') + ,(1,'2021-01-09 00:00:00',null, 'abc'),(1,'2021-01-11 00:00:00',null, 'abc'),(1,'2021-01-13 00:00:00',null, 'abc'),(1,'2021-01-15 00:00:00',null, 'abc') + ,(1,'2021-01-10 00:00:00',null, 'abc'),(1,'2021-01-12 00:00:00',null, 'abc'),(1,'2021-01-14 00:00:00',null, 'abc'),(1,'2021-01-16 00:00:00',null, 'abc') + """ + + sql "drop table if exists one_col_list_partition_date_has_null" + sql""" + CREATE TABLE `one_col_list_partition_date_has_null` ( + `a` int NULL, + `dt` datetime NULL, + `d` date NULL, + `c` varchar(100) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`a`) + PARTITION BY LIST (`dt`) + (PARTITION p10 VALUES IN (NULL), + PARTITION p1 VALUES IN ('2021-01-01 00:00:00','2021-01-02 00:00:00'), + PARTITION p2 VALUES IN ('2021-01-03 00:00:00','2021-01-04 00:00:00'), + PARTITION p3 VALUES IN ('2021-01-05 00:00:00','2021-01-06 00:00:00'), + PARTITION p4 VALUES IN ('2021-01-07 00:00:00','2021-01-08 00:00:00'), + PARTITION p5 VALUES IN ('2021-01-09 00:00:00','2021-01-10 00:00:00'), + PARTITION p6 VALUES IN ('2021-01-11 00:00:00','2021-01-12 00:00:00'), + PARTITION p7 VALUES IN ('2021-01-13 00:00:00','2021-01-14 00:00:00'), + PARTITION p8 VALUES IN ('2021-01-15 00:00:00','2021-01-16 00:00:00'), + PARTITION p9 VALUES IN ('2021-01-17 00:00:00','2021-01-18 00:00:00')) + DISTRIBUTED BY HASH(`a`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1") + """ + + sql """ + insert into one_col_list_partition_date_has_null values(1,'2021-01-01 00:00:00',null, 'abc'),(1,'2021-01-02 00:00:00',null, 'abc') + ,(1,'2021-01-03 00:00:00',null, 'abc'),(1,'2021-01-04 00:00:00',null, 'abc') + ,(1,'2021-01-05 00:00:00',null, 'abc'),(1,'2021-01-06 00:00:00',null, 'abc'),(1,'2021-01-07 00:00:00',null, 'abc'),(1,'2021-01-08 00:00:00',null, 'abc') + ,(1,'2021-01-09 00:00:00',null, 'abc'),(1,'2021-01-11 00:00:00',null, 'abc'),(1,'2021-01-13 00:00:00',null, 'abc'),(1,'2021-01-15 00:00:00',null, 'abc') + ,(1,'2021-01-10 00:00:00',null, 'abc'),(1,'2021-01-12 00:00:00',null, 'abc'),(1,'2021-01-14 00:00:00',null, 'abc'),(1,'2021-01-16 00:00:00',null, 'abc') + ,(1,null,null,'bdb') + """ + + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt<'2021-01-10 00:00:00' and dt>'2021-01-7 00:00:00'" + contains("partitions=2/9 (p4,p5)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt<'2021-01-10 00:00:00' or dt>'2021-01-14 00:00:00'" + contains("partitions=6/9 (p1,p2,p3,p4,p5,p8)") + } + + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE (dt<'2021-01-10 00:00:00' or dt>'2021-01-10 00:00:00') and dt<=>'2021-01-01 00:00:00'" + contains("partitions=1/9 (p1)") + } + + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE (dt<'2021-01-10 00:00:00' xor dt>'2021-01-10 00:00:00')" + contains("partitions=8/9 (p1,p2,p3,p4,p5,p6,p7,p8)") + + } + + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt regexp '2020-10-01 00:00:00'" + contains("partitions=8/9 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt like '%2020-10-01 00:00:00'" + contains("partitions=8/9 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt like '2020-10-01 00:00:00'" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt in ('2021-01-10 00:00:00', '2021-01-13 00:00:00')" + contains("partitions=2/9 (p5,p7)") + + } + + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt not in ('2021-01-01 00:00:00', '2021-01-02 00:00:00')" + contains("partitions=7/9 (p2,p3,p4,p5,p6,p7,p8)") + + } + + explain { + sql "select * from one_col_list_partition_date where dt is null" + contains("VEMPTYSET") + } + explain { + sql "select * from one_col_list_partition_date where dt is not null" + contains("partitions=8/9 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + explain { + sql "select * from one_col_list_partition_date where not dt is null" + contains("partitions=8/9 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + explain { + sql "select * from one_col_list_partition_date where !(dt is null)" + contains("partitions=8/9 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + + explain { + sql "select * from one_col_list_partition_date where dt <=> null" + contains("VEMPTYSET") + } + explain { + sql "select * from one_col_list_partition_date where !(dt <=> null)" + contains("partitions=8/9 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + explain { + sql "select * from one_col_list_partition_date_has_null where dt is null" + contains("partitions=1/10 (p10)") + } + explain { + sql "select * from one_col_list_partition_date_has_null where dt is not null" + contains("partitions=8/10 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + explain { + sql "select * from one_col_list_partition_date_has_null where not dt is null" + contains("partitions=8/10 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + explain { + sql "select * from one_col_list_partition_date_has_null where !(dt is null)" + contains("partitions=8/10 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + + explain { + sql "select * from one_col_list_partition_date_has_null where dt <=> null" + contains("partitions=1/10 (p10)") + } + explain { + sql "select * from one_col_list_partition_date_has_null where !(dt <=> null)" + contains("partitions=8/10 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + + + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt<'2021-01-01 00:00:00'" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt<='2021-01-02 00:00:00'" + contains("partitions=1/9 (p1)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt>'2021-1-03 00:00:00'" + contains("partitions=7/9 (p2,p3,p4,p5,p6,p7,p8)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt>='2021-1-04 00:00:00'" + contains("partitions=7/9 (p2,p3,p4,p5,p6,p7,p8)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt='2021-1-05 00:00:00'" + contains("partitions=1/9 (p3)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt<=>'2021-1-01 00:00:00'" + contains("partitions=1/9 (p1)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE dt!='2021-1-01 00:00:00' and dt!='2021-1-02 00:00:00'" + contains("partitions=7/9 (p2,p3,p4,p5,p6,p7,p8)") + } + + + // has condition function + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE coalesce(dt, null, '2020-01-01') <'2021-1-07 00:00:00'" + contains("partitions=3/9 (p1,p2,p3)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE coalesce(dt <'2021-1-07 00:00:00' , true, false)" + contains("partitions=3/9 (p1,p2,p3)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE coalesce(dt >'2021-1-07 00:00:00' , true, false)" + contains("partitions=5/9 (p4,p5,p6,p7,p8)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE coalesce(null ,dt< '2021-1-07 00:00:00', dt> '2020-10-01 00:00:00' )" + contains("partitions=3/9 (p1,p2,p3)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE if(dt<'2021-1-01 00:00:00', true, false)" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE if(a>1, dt<'2001-1-01 00:00:00', dt<'2001-1-01 00:00:00')" + contains("partitions=8/9 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + explain { + sql "SELECT * FROM one_col_list_partition_date WHERE if(dt<'2021-1-01 00:00:00', dt<'2001-1-01 00:00:00', dt>'2031-1-01 00:00:00')" + contains("VEMPTYSET") + } + explain { + sql """SELECT * FROM one_col_list_partition_date WHERE case when dt<'2021-1-01 00:00:00' then false when dt<'2021-5-01' then false + else true end;""" + contains("VEMPTYSET") + } + explain { + sql """SELECT * FROM one_col_list_partition_date WHERE case when dt<'2021-1-01 00:00:00' then false when dt<'2021-5-01' then true + else true end;""" + contains("partitions=8/9 (p1,p2,p3,p4,p5,p6,p7,p8)") + } + explain { + sql """SELECT * FROM one_col_list_partition_date WHERE case when dt<'2022-01-01 00:00:00' then dt + else '2023-01-01 00:00:00' end <'2021-01-06 00:00:00' ;""" + contains("partitions=3/9 (p1,p2,p3)") + } +} \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/one_col_range_partition.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/one_col_range_partition.groovy new file mode 100644 index 00000000000..e8b7621e6d7 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/partition_prune/one_col_range_partition.groovy @@ -0,0 +1,326 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("one_col_range_partition") { + sql "drop table if exists one_col_range_partition_date" + sql """ + create table one_col_range_partition_date(a int, dt datetime, d date, c varchar(100)) duplicate key(a) + partition by range(dt) + ( + from ("2020-01-01") to ("2024-12-31") interval 1 day + ) + distributed by hash(a) + properties("replication_num"="1"); + """ + sql """ + INSERT INTO one_col_range_partition_date SELECT number, + date_add('2020-01-01 00:00:00', interval number hour), + cast(date_add('2020-01-01 00:00:00', interval number hour) as date), cast(number as varchar(65533)) FROM numbers('number'='10000'); + """ + + sql "drop table if exists one_col_range_partition_date_has_null" + sql """create table one_col_range_partition_date_has_null (a int, dt datetime, d date, c varchar(100)) duplicate key(a) + partition by range(dt) ( + partition p1 values less than ("2017-01-01"), + partition p2 values less than ("2018-01-01"), + partition p3 values less than ("2019-01-01"), + partition p4 values less than ("2020-01-01"), + partition p5 values less than ("2021-01-01") + )distributed by hash(a) + properties("replication_num"="1");""" + sql """INSERT INTO one_col_range_partition_date_has_null SELECT number, + date_add('2017-01-01 00:00:00', interval number day), + cast(date_add('2022-01-01 00:00:00', interval number day) as date), cast(number as varchar(65533)) FROM numbers('number'='1000');""" + sql """INSERT INTO one_col_range_partition_date_has_null values(3,null,null,null);""" + + + + // or and + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt<'2020-10-01 00:00:00' and dt>'2020-9-01 00:00:00'" + contains("partitions=30/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt<'2020-10-01 00:00:00' or dt>'2020-9-01 00:00:00'" + contains("partitions=417/1826") + } + + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE (dt<'2020-10-01 00:00:00' or dt>'2020-9-01 00:00:00') and dt<=>'2020-9-5 00:00:00'" + contains("partitions=1/1826 (p_20200905)") + } + + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE (dt<'2020-10-01 00:00:00' xor dt>'2020-9-01 00:00:00')" + contains("partitions=417/1826") + } + + explain { + sql """SELECT count(*) FROM one_col_range_partition_date WHERE + !(dt<'2020-10-01 00:00:00' and dt>'2020-9-01 00:00:00' or date_trunc(dt,'month')<'2020-7-01' and date_trunc(dt,'month')>'2020-6-01' )""" + contains("partitions=388/1826") + } + + explain { + sql """SELECT count(*) FROM one_col_range_partition_date WHERE + (dt>='2020-10-01 00:00:00' or dt<='2020-9-01 00:00:00' ) and (date_trunc(dt,'month')>='2020-9-01' or date_trunc(dt,'month')<='2020-6-01' )""" + contains("partitions=327/1826") + } + + // ! + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE !('2020-09-01' not like '%2020-10-01 00:00:00%')" + contains("partitions=417/1826 ") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE !('2020-09-01' not in ('2020-10-01','2020-09-01'))" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE !('2020-09-01' in ('2020-10-01','2020-09-01'))" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE !('2020-10-02'>'2020-10-01')" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE !(dt>='2020-10-01 00:00:00' and a<100)" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE !(a<100 and c>10)" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE !(a<100 or c>10)" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE !(dt>='2020-10-01 00:00:00' or a<100 )" + contains("partitions=274/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE !(dt<'2020-2-01 1:00:00' or (a<100 and dt>='2020-10-01 12:00:00'))" + contains("partitions=386/1826") + } + + // like regexp + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt regexp '2020-10-01 00:00:00'" + contains("partitions=417/1826") + } + + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt like '2020-10-01 00:00:00'" + contains("partitions=417/1826") + } + + // in + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt in ('2020-10-01 00:10:00', '2020-10-02 00:00:00')" + contains("partitions=2/1826") + } + + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt not in ('2020-10-01 00:10:00', '2020-10-02 00:00:00')" + contains("partitions=417/1826") + } + + // is null + //partition key has null value + explain { + sql "select * from one_col_range_partition_date_has_null where dt is null" + contains("partitions=1/5 (p1)") + } + explain { + sql "select * from one_col_range_partition_date_has_null where dt is not null" + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql "select * from one_col_range_partition_date_has_null where not dt is null" + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql "select * from one_col_range_partition_date_has_null where !(dt is null)" + contains("partitions=4/5 (p1,p2,p3,p4)") + } + + explain { + sql "select * from one_col_range_partition_date_has_null where dt <=> null" + contains("partitions=1/5 (p1)") + } + explain { + sql "select * from one_col_range_partition_date_has_null where !(dt <=> null)" + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql "select * from one_col_range_partition_date where dt is null" + contains("VEMPTYSET") + } + explain { + sql "select * from one_col_range_partition_date where dt is not null" + contains("partitions=417/1826") + } + explain { + sql "select * from one_col_range_partition_date where not dt is null" + contains("partitions=417/1826") + } + explain { + sql "select * from one_col_range_partition_date where !(dt is null)" + contains("partitions=417/1826") + } + + explain { + sql "select * from one_col_range_partition_date where dt <=> null" + contains("VEMPTYSET") + } + explain { + sql "select * from one_col_range_partition_date where !(dt <=> null)" + contains("partitions=417/1826") + } + + // op + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt<'2020-10-01 00:00:00'" + contains("partitions=274/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt<='2020-10-01 00:00:00'" + contains("partitions=275/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt>'2020-10-01 00:00:00'" + contains("partitions=143/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt>='2020-10-01 00:00:00'" + contains("partitions=143/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt='2020-10-01 00:00:00'" + contains("partitions=1/1826 (p_20201001)") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt<=>'2020-10-01 00:00:00'" + contains("partitions=1/1826 (p_20201001)") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt!='2020-10-01 00:00:00'" + contains("partitions=417/1826") + } + + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE coalesce(dt, null, '2020-01-01') <'2020-10-01 00:00:00'" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE coalesce(dt <'2020-2-01 00:00:00' , true, false)" + contains("partitions=31/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE coalesce(dt >'2020-2-01 00:00:00' , true, false)" + contains("partitions=386/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE coalesce(null ,dt< '2001-1-01 00:00:00', dt> '2020-10-01 00:00:00' )" + contains("partitions=143/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE if(dt<'2001-1-01 00:00:00', true, false)" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE if(a>1, dt<'2001-1-01 00:00:00', dt>'2001-1-01 00:00:00')" + contains("partitions=417/1826 ") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE if(dt<'2021-1-01 00:00:00', dt<'2001-1-01 00:00:00', dt>'2001-1-01 00:00:00')" + contains("partitions=417/1826") + } + explain { + sql """SELECT * FROM one_col_range_partition_date WHERE case when dt<'2021-1-01 00:00:00' then false when dt<'2021-5-01' then false + else true end;""" + contains("partitions=417/1826") + } + explain { + sql """SELECT * FROM one_col_range_partition_date WHERE case when dt<'2022-1-01 00:00:00' then dt + else '2023-1-01 00:00:00' end <'2021-1-01 00:00:00' ;""" + contains("partitions=417/1826") + } + + // predicates has non partition column + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE '2020-09-01' not like '%2020-10-01 00:00:00%'" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE '2020-09-01' not in ('2020-10-01','2020-09-01')" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE '2020-09-01' in ('2020-10-01','2020-09-01')" + contains("partitions=417/1826") + } + + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE '2020-10-02'>'2020-10-01'" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt>='2020-10-01 00:00:00' and a<100" + contains("partitions=143/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE a<100 and c>10" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE a<100 or c>10" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt>='2020-10-01 00:00:00' or a<100" + contains("partitions=417/1826") + } + + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE not (dt>='2020-10-01 00:00:00' and a<100)" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt='2020-10-01 00:00:00' or a<100" + contains("partitions=417/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE not(dt!='2020-10-01 00:00:00' or a<100)" + contains("partitions=1/1826 (p_20201001)") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt<'2020-1-01 1:00:00' or a<100 and dt>='2020-10-01 12:00:00'" + contains("partitions=144/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt>='2020-10-01 00:00:00' and dt=d" + contains("partitions=143/1826") + } + explain { + sql "SELECT * FROM one_col_range_partition_date WHERE dt>='2020-10-01 00:00:00' or dt=d" + contains("partitions=417/1826") + } + +} \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/varchar_as_date_partition_col.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/varchar_as_date_partition_col.groovy new file mode 100644 index 00000000000..0cfe4963f27 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/partition_prune/varchar_as_date_partition_col.groovy @@ -0,0 +1,251 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("varchar_as_date_partition_col") { + sql"""drop table if exists partition_varchar;""" + sql """CREATE TABLE partition_varchar(a int, dt varchar(10), rdt datetime) PARTITION BY list(dt) ( + partition p20240101 values in ("20240101","20240102"), + partition p20240103 values in ("20240103","20240104"), + partition p20240105 values in ("20240105","20240106"), + partition p20240107 values in ("20240107","20240108"), + partition p20240109 values in ("20240109","20240110"), + partition p20240111 values in ("20240111","20240112"), + partition p20240113 values in ("20240113","20240114"), + partition p20240115 values in ("20240115","20240116"), + partition p20240117 values in ("20240117","20240118") + ) + distributed BY hash(a) + properties( + "replication_num"="1" + );""" + sql """ + insert into partition_varchar + SELECT number, DATE_FORMAT(date_add('2024-01-01', interval number day),"yyyyMMdd"),date_add('2024-01-01', interval number day) + FROM numbers('number'='10');""" + + sql "drop table if exists partition_varchar_has_null" + sql """CREATE TABLE partition_varchar_has_null(a int, dt varchar(10), rdt datetime) PARTITION BY list(dt) ( + partition p20240101 values in ("20240101","20240102",null), + partition p20240103 values in ("20240103","20240104"), + partition p20240105 values in ("20240105","20240106"), + partition p20240107 values in ("20240107","20240108"), + partition p20240109 values in ("20240109","20240110"), + partition p20240111 values in ("20240111","20240112"), + partition p20240113 values in ("20240113","20240114"), + partition p20240115 values in ("20240115","20240116"), + partition p20240117 values in ("20240117","20240118") + ) + distributed BY hash(a) + properties( + "replication_num"="1" + );""" + + sql """insert into partition_varchar_has_null + SELECT number, DATE_FORMAT(date_add('2024-01-01', interval number day),"yyyyMMdd"),date_add('2024-01-01', interval number day) + FROM numbers('number'='10');""" + sql """insert into partition_varchar_has_null values(11,null,null)""" + + + explain { + sql "SELECT * FROM partition_varchar WHERE dt<'20240110' and dt>'20240104'" + contains("partitions=3/9 (p20240105,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt<'20240104' or dt>'20240110'" + contains("partitions=2/9 (p20240101,p20240103)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt<date_format(now(),'yyyyMMdd') or dt > date_format(cast(now() as date) - interval 12 month,'yyyyMMdd')" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE date_trunc(dt,'day')< '20240105'" + contains("partitions=2/9 (p20240101,p20240103)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE date_trunc(dt,'day')< date_format(cast(now() as date) - interval 10 month,'yyyyMMdd')" + contains(" partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE (date_trunc(dt,'day')<'20240105' or dt>='20240108') or dt<=>'20240110';" + contains("partitions=4/9 (p20240101,p20240103,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE (dt<'20240107' and dt>='20240104') or dt<=>'20240109'" + contains("partitions=3/9 (p20240103,p20240105,p20240109)") + } + explain { + sql "SELECT count(*) FROM partition_varchar WHERE (date_trunc(dt,'day')<'2024-1-03' and date_trunc(dt,'day')>='2024-1-01' )" + contains("partitions=1/9 (p20240101)") + } + + explain { + sql """ + SELECT count(*) FROM partition_varchar WHERE + !(date_trunc(dt,'day')<'2024-1-03' and date_trunc(dt,'day')>='2024-1-01' ) + """ + contains("partitions=4/9 (p20240103,p20240105,p20240107,p20240109)") + } + + + explain { + sql "SELECT * FROM partition_varchar WHERE !('2020-09-01' not like '%2020-10-01 00:00:00%')" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE !('2020-09-01' not in ('2020-10-01','2020-09-01'))" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE !('2020-09-01' in ('2020-10-01','2020-09-01'))" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM partition_varchar WHERE !('2020-10-02'>'2020-10-01')" + contains("VEMPTYSET") + } + explain { + sql "SELECT * FROM partition_varchar WHERE !(dt>='20241001' and a<100)" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE !(a<100 )" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + + explain { + sql "SELECT * FROM partition_varchar WHERE !(dt>='20240102' or a<100 )" + contains("partitions=1/9 (p20240101)") + } + + explain { + sql "SELECT * FROM partition_varchar WHERE !(dt>='20240102' or (a<100 and dt<'20240801'))" + contains("partitions=1/9 (p20240101)") + } + + explain { + sql "SELECT * FROM partition_varchar WHERE dt regexp '2024-10-01'" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + + explain { + sql "SELECT * FROM partition_varchar WHERE dt like '%20240101'" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt like '20240101'" + contains("partitions=1/9 (p20240101)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt in ('20240101', '20241002')" + contains("partitions=1/9 (p20240101)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt not in ('20241001', '20241002')" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + + //partition key has null value + explain { + sql "select * from partition_varchar_has_null where dt is null" + contains("partitions=1/9 (p20240101)") + } + + explain { + sql "select * from partition_varchar_has_null where dt is not null" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + + explain { + sql "select * from partition_varchar_has_null where not dt is null" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "select * from partition_varchar_has_null where !(dt is null)" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "select * from partition_varchar_has_null where dt <=> null" + contains("partitions=1/9 (p20240101)") + } + explain { + sql "select * from partition_varchar_has_null where !(dt <=> null)" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + //partition key has no null value + explain { + sql "select * from partition_varchar where dt is null" + contains("VEMPTYSET") + } + explain { + sql "select * from partition_varchar where dt is not null" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "select * from partition_varchar where not dt is null" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "select * from partition_varchar where !(dt is null)" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "select * from partition_varchar where dt <=> null" + contains("VEMPTYSET") + } + explain { + sql "select * from partition_varchar where !(dt <=> null)" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "select * from partition_varchar where date_trunc(dt,'month') is null" + contains("VEMPTYSET") + } + explain { + sql "select * from partition_varchar where date_trunc(dt,'month') is not null" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt<'20240103'" + contains("partitions=1/9 (p20240101)") + } + + explain { + sql "SELECT * FROM partition_varchar WHERE dt<='20240103'" + contains("partitions=2/9 (p20240101,p20240103)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt>'20240103'" + contains("partitions=4/9 (p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt>='20240103'" + contains("partitions=4/9 (p20240103,p20240105,p20240107,p20240109)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt='20240103'" + contains("partitions=1/9 (p20240103)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt<=>'20240103'" + contains("partitions=1/9 (p20240103)") + } + explain { + sql "SELECT * FROM partition_varchar WHERE dt!='20240103'" + contains("partitions=5/9 (p20240101,p20240103,p20240105,p20240107,p20240109)") + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org