This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 4cb97dacca7 HIVE-27291: Constant reduction in CBO does not work for
UNIX_TIMESTAMP (Krisztian Kasa, reviewed by Stamatis Zampetakis)
4cb97dacca7 is described below
commit 4cb97dacca7d908a9de1f5dd64492d00efcb180d
Author: Krisztian Kasa <[email protected]>
AuthorDate: Fri Apr 11 15:37:53 2025 +0200
HIVE-27291: Constant reduction in CBO does not work for UNIX_TIMESTAMP
(Krisztian Kasa, reviewed by Stamatis Zampetakis)
---
.../reloperators/HiveUnixTimestampSqlOperator.java | 8 +-
.../cbo_join_transitive_pred_loop_1.q | 3 +
.../clientpositive/materialized_view_rewrite_12.q | 18 +++
.../materialized_view_rewrite_by_text_10.q | 4 +
.../clientpositive/partition_unix_timestamp.q | 9 ++
.../cbo_filter_proj_transpose_noinputref.q.out | 18 ++-
.../llap/cbo_join_transitive_pred_loop_1.q.out | 7 +-
.../llap/materialized_view_rewrite_12.q.out | 133 +++++++++++++++++++++
.../materialized_view_rewrite_by_text_10.q.out | 2 +-
.../llap/partition_unix_timestamp.q.out | 29 +++++
10 files changed, 208 insertions(+), 23 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnixTimestampSqlOperator.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnixTimestampSqlOperator.java
index 04bb8b05f21..4ebf2a6027f 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnixTimestampSqlOperator.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnixTimestampSqlOperator.java
@@ -29,13 +29,9 @@
*/
public class HiveUnixTimestampSqlOperator {
public static final SqlFunction INSTANCE =
- new SqlFunction("UNIX_TIMESTAMP", SqlKind.OTHER_FUNCTION,
ReturnTypes.BIGINT, null,
+ new HiveSqlFunction("UNIX_TIMESTAMP", SqlKind.OTHER_FUNCTION,
ReturnTypes.BIGINT, null,
OperandTypes.or(OperandTypes.NILADIC,
OperandTypes.or(OperandTypes.STRING, OperandTypes.TIMESTAMP,
OperandTypes.DATE),
- OperandTypes.STRING_STRING), SqlFunctionCategory.NUMERIC) {
- @Override
- public boolean isDynamicFunction() {
- return true;
- }
+ OperandTypes.STRING_STRING), SqlFunctionCategory.NUMERIC, true,
true) {
};
}
diff --git
a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q
b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q
index d9986318f11..64ac7049df0 100644
--- a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q
+++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q
@@ -1,5 +1,8 @@
set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false;
+-- HiveProject(month=[CAST(202503):INTEGER])
+--! qt:replace:/(.*month=\[CAST\()\d+(\).*)/$1#Masked#$2/
+
CREATE TABLE test1 (act_nbr string);
CREATE TABLE test2 (month int);
CREATE TABLE test3 (mth int, con_usd double);
diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_12.q
b/ql/src/test/queries/clientpositive/materialized_view_rewrite_12.q
new file mode 100644
index 00000000000..3aaf410f97a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_12.q
@@ -0,0 +1,18 @@
+-- Materialized view with runtime constant function can not be used in
automatic query rewrites
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.materializedview.rewriting.sql=false;
+
+create table t1(a int, week_end_year int) stored as orc TBLPROPERTIES
('transactional'='true');
+
+create materialized view mat1 as
+SELECT a, week_end_year FROM t1 WHERE week_end_year = year(from_unixtime(
unix_timestamp() ));
+
+create materialized view mat2 as
+SELECT a, week_end_year FROM t1 WHERE week_end_year = year(
current_timestamp() );
+
+describe formatted mat1;
+describe formatted mat2;
+
+show materialized views;
diff --git
a/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_10.q
b/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_10.q
index 43deec4a814..0a8061562f7 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_10.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_10.q
@@ -1,4 +1,8 @@
-- Materialzed view definition has non-deterministic function
+
+-- HiveFilter(condition=[<=(CAST($1):BIGINT, 1744204765)])
+--! qt:replace:/(.*,\s)\d+(\)\])/$1#Masked#$2/
+
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
diff --git a/ql/src/test/queries/clientpositive/partition_unix_timestamp.q
b/ql/src/test/queries/clientpositive/partition_unix_timestamp.q
new file mode 100644
index 00000000000..8e50de62371
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/partition_unix_timestamp.q
@@ -0,0 +1,9 @@
+-- HiveFilter(condition=[=(CAST($1):DOUBLE, 2.0250E3)])
+--! qt:replace:/(.*,\s)[0-9.E]+(\)\])/$1#Masked#$2/
+
+create table t1 (a int) partitioned by (p_year string);
+
+explain cbo
+select * from t1 where p_year IN (
+ year(from_unixtime( unix_timestamp() ))
+ );
diff --git
a/ql/src/test/results/clientpositive/llap/cbo_filter_proj_transpose_noinputref.q.out
b/ql/src/test/results/clientpositive/llap/cbo_filter_proj_transpose_noinputref.q.out
index 587f95c03a5..c3b9a11cc60 100644
---
a/ql/src/test/results/clientpositive/llap/cbo_filter_proj_transpose_noinputref.q.out
+++
b/ql/src/test/results/clientpositive/llap/cbo_filter_proj_transpose_noinputref.q.out
@@ -18,9 +18,7 @@ unix_timestamp(void) is deprecated. Use current_timestamp
instead.
unix_timestamp(void) is deprecated. Use current_timestamp instead.
unix_timestamp(void) is deprecated. Use current_timestamp instead.
unix_timestamp(void) is deprecated. Use current_timestamp instead.
-unix_timestamp(void) is deprecated. Use current_timestamp instead.
-unix_timestamp(void) is deprecated. Use current_timestamp instead.
-unix_timestamp(void) is deprecated. Use current_timestamp instead.
+Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage
'Reducer 2' is a cross product
PREHOOK: query: EXPLAIN CBO SELECT c.m
FROM (
SELECT substr(from_unixtime(unix_timestamp(), 'yyyy-MM-dd'), 1, 1) as m
@@ -42,13 +40,11 @@ POSTHOOK: Input: default@test1
POSTHOOK: Input: default@test2
#### A masked pattern was here ####
CBO PLAN:
-HiveProject(m=[$0])
- HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none],
cost=[not available])
- HiveProject(m=[substr(FROM_UNIXTIME(UNIX_TIMESTAMP(),
_UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1)])
- HiveFilter(condition=[=(substr(FROM_UNIXTIME(UNIX_TIMESTAMP(),
_UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1),
_UTF-16LE'2')])
- HiveProject(DUMMY=[0])
- HiveTableScan(table=[[default, test1]], table:alias=[test1])
- HiveProject($f0=[substr(FROM_UNIXTIME(UNIX_TIMESTAMP(),
_UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1)])
- HiveFilter(condition=[AND(=($0, substr(FROM_UNIXTIME(UNIX_TIMESTAMP(),
_UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1)),
=(substr(FROM_UNIXTIME(UNIX_TIMESTAMP(),
_UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1),
_UTF-16LE'2'))])
+HiveProject(m=[CAST(_UTF-16LE'2':VARCHAR(2147483647) CHARACTER SET
"UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"])
+ HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not
available])
+ HiveProject(DUMMY=[0])
+ HiveTableScan(table=[[default, test1]], table:alias=[test1])
+ HiveProject(m=[CAST(_UTF-16LE'2':VARCHAR(2147483647) CHARACTER SET
"UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"])
+ HiveFilter(condition=[=(_UTF-16LE'2', $0)])
HiveTableScan(table=[[default, test2]], table:alias=[d])
diff --git
a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out
b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out
index 9ea53f75cb6..dae047df095 100644
---
a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out
+++
b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out
@@ -24,7 +24,6 @@ POSTHOOK: Output: database:default
POSTHOOK: Output: default@test3
unix_timestamp(void) is deprecated. Use current_timestamp instead.
unix_timestamp(void) is deprecated. Use current_timestamp instead.
-unix_timestamp(void) is deprecated. Use current_timestamp instead.
PREHOOK: query: EXPLAIN CBO
SELECT c.month,
d.con_usd
@@ -62,10 +61,8 @@ HiveProject(month=[$0], con_usd=[$2])
HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none],
cost=[not available])
HiveProject(month=[$0])
HiveUnion(all=[true])
-
HiveProject(month=[CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP(),
_UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1,
7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE",
_UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER])
- HiveFilter(condition=[IS NOT
NULL(CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP(),
_UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1,
7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE",
_UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER)])
- HiveProject(DUMMY=[0])
- HiveTableScan(table=[[default, test1]], table:alias=[test1])
+ HiveProject(month=[CAST(#Masked#):INTEGER])
+ HiveTableScan(table=[[default, test1]], table:alias=[test1])
HiveProject($f0=[CAST(202110):INTEGER])
HiveFilter(condition=[=($0, 202110)])
HiveTableScan(table=[[default, test2]], table:alias=[test2])
diff --git
a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_12.q.out
b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_12.q.out
new file mode 100644
index 00000000000..21e097972f5
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_12.q.out
@@ -0,0 +1,133 @@
+PREHOOK: query: create table t1(a int, week_end_year int) stored as orc
TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1(a int, week_end_year int) stored as orc
TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+unix_timestamp(void) is deprecated. Use current_timestamp instead.
+unix_timestamp(void) is deprecated. Use current_timestamp instead.
+Cannot enable automatic rewriting for materialized view. UNIX_TIMESTAMP is not
a deterministic function
+PREHOOK: query: create materialized view mat1 as
+SELECT a, week_end_year FROM t1 WHERE week_end_year = year(from_unixtime(
unix_timestamp() ))
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mat1
+POSTHOOK: query: create materialized view mat1 as
+SELECT a, week_end_year FROM t1 WHERE week_end_year = year(from_unixtime(
unix_timestamp() ))
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mat1
+POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:int,
comment:null), ]
+POSTHOOK: Lineage: mat1.week_end_year SIMPLE []
+Cannot enable automatic rewriting for materialized view. current_timestamp is
not a deterministic function
+PREHOOK: query: create materialized view mat2 as
+SELECT a, week_end_year FROM t1 WHERE week_end_year = year(
current_timestamp() )
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mat2
+POSTHOOK: query: create materialized view mat2 as
+SELECT a, week_end_year FROM t1 WHERE week_end_year = year(
current_timestamp() )
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mat2
+POSTHOOK: Lineage: mat2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:int,
comment:null), ]
+POSTHOOK: Lineage: mat2.week_end_year SIMPLE []
+PREHOOK: query: describe formatted mat1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@mat1
+POSTHOOK: query: describe formatted mat1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@mat1
+# col_name data_type comment
+a int
+week_end_year int
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MATERIALIZED_VIEW
+Table Parameters:
+ COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"week_end_year\":\"true\"}}
+ bucketing_version 2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ totalSize #Masked#
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+
+# Materialized View Information
+Original Query: SELECT a, week_end_year FROM t1 WHERE week_end_year =
year(from_unixtime( unix_timestamp() ))
+Expanded Query: SELECT `t1`.`a`, `t1`.`week_end_year` FROM
`default`.`t1` WHERE `t1`.`week_end_year` = year(from_unixtime(
unix_timestamp() ))
+Rewrite Enabled: No
+Outdated for Rewriting: No
+
+# Materialized View Source table information
+Table name Snapshot
+default.t1 :0:9223372036854775807::
+PREHOOK: query: describe formatted mat2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@mat2
+POSTHOOK: query: describe formatted mat2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@mat2
+# col_name data_type comment
+a int
+week_end_year int
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MATERIALIZED_VIEW
+Table Parameters:
+ COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"week_end_year\":\"true\"}}
+ bucketing_version 2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ totalSize #Masked#
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+
+# Materialized View Information
+Original Query: SELECT a, week_end_year FROM t1 WHERE week_end_year =
year( current_timestamp() )
+Expanded Query: SELECT `t1`.`a`, `t1`.`week_end_year` FROM
`default`.`t1` WHERE `t1`.`week_end_year` = year( current_timestamp() )
+Rewrite Enabled: No
+Outdated for Rewriting: No
+
+# Materialized View Source table information
+Table name Snapshot
+default.t1 :0:9223372036854775807::
+PREHOOK: query: show materialized views
+PREHOOK: type: SHOWMATERIALIZEDVIEWS
+POSTHOOK: query: show materialized views
+POSTHOOK: type: SHOWMATERIALIZEDVIEWS
+# MV Name Rewriting Enabled Mode
Incremental rebuild
+mat1 No Manual refresh Not
available
+mat2 No Manual refresh Not
available
diff --git
a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_10.q.out
b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_10.q.out
index 3455a1fe9ee..090eb840880 100644
---
a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_10.q.out
+++
b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_10.q.out
@@ -35,6 +35,6 @@ POSTHOOK: Input: default@emps
#### A masked pattern was here ####
CBO PLAN:
HiveProject(ename=[$0], birth_epoch_secs=[$1])
- HiveFilter(condition=[<=(CAST($1):BIGINT, UNIX_TIMESTAMP())])
+ HiveFilter(condition=[<=(CAST($1):BIGINT, #Masked#)])
HiveTableScan(table=[[default, emps]], table:alias=[emps])
diff --git
a/ql/src/test/results/clientpositive/llap/partition_unix_timestamp.q.out
b/ql/src/test/results/clientpositive/llap/partition_unix_timestamp.q.out
new file mode 100644
index 00000000000..80d046e8772
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/partition_unix_timestamp.q.out
@@ -0,0 +1,29 @@
+PREHOOK: query: create table t1 (a int) partitioned by (p_year string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int) partitioned by (p_year string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+unix_timestamp(void) is deprecated. Use current_timestamp instead.
+unix_timestamp(void) is deprecated. Use current_timestamp instead.
+PREHOOK: query: explain cbo
+select * from t1 where p_year IN (
+ year(from_unixtime( unix_timestamp() ))
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select * from t1 where p_year IN (
+ year(from_unixtime( unix_timestamp() ))
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0], p_year=[$1])
+ HiveFilter(condition=[=(CAST($1):DOUBLE, #Masked#)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+