(hive) 02/02: HIVE-27919: Constant reduction in CBO does not work for FROM_UNIXTIME, DATE_ADD, DATE_SUB, TO_UNIX_TIMESTAMP (Stamatis Zampetakis reviewed by Akshat Mathur, Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git commit 36ce858163a19e29eafe4a8d3307191bc28fc175 Author: Stamatis Zampetakis AuthorDate: Fri Dec 8 13:22:56 2023 +0100 HIVE-27919: Constant reduction in CBO does not work for FROM_UNIXTIME, DATE_ADD, DATE_SUB, TO_UNIX_TIMESTAMP (Stamatis Zampetakis reviewed by Akshat Mathur, Krisztian Kasa) Constant reduction does not work because the functions are declared as dynamic (isDynamicFunction returns true). However, the dynamic declaration is wrong cause none of the above depends on context variables; they all operate on concrete parameters and require one or more inputs. Moreover, DATE_ADD, DATE_SUB, and FROM_UNIXTIME are not time functions, so it is wrong to extend the SqlAbsctractTimeFunction class. The overrides in SqlAbsctractTimeFunction are not correct/relevant to these functions so the changes here address this as well. Overview of the changes: 1. Turn DATE_ADD, DATE_SUB, and FROM_UNIXTIME to regular SqlFunctions and pass the correct return type inference strategy. The operand type inference and type checker can remain null as they were before since they are not used currently in Hive. 2. Change the type family for FROM_UNIXTIME to reflect that the function returns a string (and not date or time). 3. Create and pass an appropriate operand checker for FROM_UNIXTIME (minor since it's not used at the moment). 4. Remove isDynamicFunction override from TO_UNIX_TIMESTAMP (which is wrong), to enable constant reduction. 5. Finalize classes and make them non-instantiable Constant reduction in CBO allows some further optimizations to kick-in such as the removal of the (always true) filter operator in constant_prop_coalesce.q.out and the transformation to dynamic partition hash join (DPHJ) in tez_dynpart_hashjoin_4.q.out. Note, that without the changes here the DPHJ transformation for the query in tez_dynpart_hashjoin_4.q fails due to inconsistencies on the way constant folding is performed at the Operator (physical) layer (HIVE-27658). Close apache/hive#4932 --- .../reloperators/HiveDateAddSqlOperator.java | 15 +-- .../reloperators/HiveDateSubSqlOperator.java | 15 +-- .../reloperators/HiveFromUnixTimeSqlOperator.java | 29 -- .../HiveToUnixTimestampSqlOperator.java| 12 +-- .../queries/clientpositive/cbo_constantfolding.q | 5 + .../clientpositive/llap/cbo_constantfolding.q.out | 60 .../llap/constant_prop_coalesce.q.out | 10 +- .../llap/tez_dynpart_hashjoin_4.q.out | 101 ++--- 8 files changed, 165 insertions(+), 82 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveDateAddSqlOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveDateAddSqlOperator.java index af9b12ee6f7..ef865e4d22c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveDateAddSqlOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveDateAddSqlOperator.java @@ -18,13 +18,16 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; -import org.apache.calcite.sql.fun.SqlAbstractTimeFunction; -import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.ReturnTypes; -public class HiveDateAddSqlOperator extends SqlAbstractTimeFunction { - public static final HiveDateAddSqlOperator INSTANCE = new HiveDateAddSqlOperator(); +public final class HiveDateAddSqlOperator { + public static final SqlFunction INSTANCE = + new SqlFunction("DATE_ADD", SqlKind.OTHER_FUNCTION, ReturnTypes.DATE_NULLABLE, null, null, + SqlFunctionCategory.TIMEDATE); - protected HiveDateAddSqlOperator() { -super("DATE_ADD", SqlTypeName.DATE); + private HiveDateAddSqlOperator() { } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveDateSubSqlOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveDateSubSqlOperator.java index 4f737126f02..d1c00211189 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveDateSubSqlOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveDateSubSqlOperator.java @@ -18,13 +18,16 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; -import org.apache.calcite.sql.fun.SqlAbstractTimeFunction; -import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.SqlFunction; +import
(hive) 01/02: HIVE-27963: Build failure when license-maven-plugin downloads bsd-license.php (Akshat Mathur reviewed by Stamatis Zampetakis, Ayush Saxena)
This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git commit 6671e1a91f288d8552840a2517912e5820d0204b Author: akshat0395 AuthorDate: Wed Dec 20 14:01:53 2023 +0530 HIVE-27963: Build failure when license-maven-plugin downloads bsd-license.php (Akshat Mathur reviewed by Stamatis Zampetakis, Ayush Saxena) 1. Add BSD-2-CLAUSE in licenseUrlFileNames patterns to unify downloaded files and avoid the build failure. 2. Upgrade plugin version from 2.1.0 to 2.3.0 (unrelated to the failure but still beneficial) Close apache/hive#4963 --- packaging/pom.xml | 4 pom.xml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/packaging/pom.xml b/packaging/pom.xml index 274dcc7d08b..9a25658475d 100644 --- a/packaging/pom.xml +++ b/packaging/pom.xml @@ -158,6 +158,10 @@ \Qhttps://opensource.org/licenses/Apache-2.0\E \Qhttp://www.apache.org/licenses/\E + + https?://(www\.)?opensource.org/licenses/bsd-license.php + https?://(www\.)?opensource.org/licenses/BSD-2-Clause + https?://(www\.)?opensource.org/licenses/BSD-3-Clause diff --git a/pom.xml b/pom.xml index 62ba8c78d61..b6959e10cee 100644 --- a/pom.xml +++ b/pom.xml @@ -100,7 +100,7 @@ 3.5.0 3.0.0-M4 2.7.10 -2.1.0 +2.3.0 1.10.1 1.10.13
(hive) branch master updated (f265cc25905 -> 36ce858163a)
This is an automated email from the ASF dual-hosted git repository. zabetak pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git from f265cc25905 HIVE-27876 Incorrect query results on tables with ClusterBy & SortBy (Ramesh Kumar Thangarajan, reviewed by Krisztian Kasa, Attila Turoczy) new 6671e1a91f2 HIVE-27963: Build failure when license-maven-plugin downloads bsd-license.php (Akshat Mathur reviewed by Stamatis Zampetakis, Ayush Saxena) new 36ce858163a HIVE-27919: Constant reduction in CBO does not work for FROM_UNIXTIME, DATE_ADD, DATE_SUB, TO_UNIX_TIMESTAMP (Stamatis Zampetakis reviewed by Akshat Mathur, Krisztian Kasa) The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: packaging/pom.xml | 4 + pom.xml| 2 +- .../reloperators/HiveDateAddSqlOperator.java | 15 +-- .../reloperators/HiveDateSubSqlOperator.java | 15 +-- .../reloperators/HiveFromUnixTimeSqlOperator.java | 29 -- .../HiveToUnixTimestampSqlOperator.java| 12 +-- .../queries/clientpositive/cbo_constantfolding.q | 5 + .../clientpositive/llap/cbo_constantfolding.q.out | 60 .../llap/constant_prop_coalesce.q.out | 10 +- .../llap/tez_dynpart_hashjoin_4.q.out | 101 ++--- 10 files changed, 170 insertions(+), 83 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/cbo_constantfolding.q create mode 100644 ql/src/test/results/clientpositive/llap/cbo_constantfolding.q.out
(hive) branch master updated: HIVE-27876 Incorrect query results on tables with ClusterBy & SortBy (Ramesh Kumar Thangarajan, reviewed by Krisztian Kasa, Attila Turoczy)
This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f265cc25905 HIVE-27876 Incorrect query results on tables with ClusterBy & SortBy (Ramesh Kumar Thangarajan, reviewed by Krisztian Kasa, Attila Turoczy) f265cc25905 is described below commit f265cc25905d0bdbdc65a16720e33fb21ee79da9 Author: Ramesh Kumar AuthorDate: Wed Dec 20 01:02:36 2023 -0800 HIVE-27876 Incorrect query results on tables with ClusterBy & SortBy (Ramesh Kumar Thangarajan, reviewed by Krisztian Kasa, Attila Turoczy) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 6 +- .../queries/clientpositive/groupby_sort_2_23.q | 10 + .../results/clientpositive/cbo_rp_auto_join1.q.out | 457 ++--- .../llap/auto_sortmerge_join_10.q.out | 295 - .../clientpositive/llap/bucket_groupby.q.out | 89 +++- .../clientpositive/llap/groupby_sort_2_23.q.out| 180 6 files changed, 779 insertions(+), 258 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 714df4c22a9..1fa63ae3821 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2019,10 +2019,10 @@ public class HiveConf extends Configuration { HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true, "Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has \n" + "common group by keys, it will be optimized to generate single M/R job."), -HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", true, +HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false, "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + -"the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" + -"is that it limits the number of mappers to the number of files."), +"the group by in the mapper by using BucketizedHiveInputFormat. This can only work if the number of files to be\n" + +"processed is exactly 1. The downside to this is that it limits the number of mappers to the number of files."), HIVE_DEFAULT_NULLS_LAST("hive.default.nulls.last", true, "Whether to set NULLS LAST as the default null ordering for ASC order and " + "NULLS FIRST for DESC order."), diff --git a/ql/src/test/queries/clientpositive/groupby_sort_2_23.q b/ql/src/test/queries/clientpositive/groupby_sort_2_23.q new file mode 100644 index 000..b241bee6855 --- /dev/null +++ b/ql/src/test/queries/clientpositive/groupby_sort_2_23.q @@ -0,0 +1,10 @@ +set hive.mapred.mode=nonstrict; +set hive.map.aggr=true; +set hive.explain.user=false; + +create table test_bucket(age int, name string, dept string) clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored as ORC; +insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2'); +insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2'); + +explain vectorization detail select age, name, count(*) from test_bucket group by age, name having count(*) > 1; +select age, name, count(*) from test_bucket group by age, name having count(*) > 1; diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index 8f3788d40fa..5bdf0edc2b4 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -92,8 +92,10 @@ POSTHOOK: Input: default@tbl2_n12 A masked pattern was here STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -112,49 +114,53 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + bucketGroup: true keys: key (type: int) - mode: final + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator -expressions: _col0 (type: int), _col1 (type: bigint) -outputColumnNames: key, $f1 + Reduce Output