This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 33b02ebb317 HIVE-27124: Disable ANTI join transformation by default (Steve Carlin reviewed by Stamatis Zampetakis) 33b02ebb317 is described below commit 33b02ebb317f261bfb2a9a6dde740fbe565c6946 Author: Steve Carlin <scar...@cloudera.com> AuthorDate: Tue Mar 7 12:51:13 2023 -0800 HIVE-27124: Disable ANTI join transformation by default (Steve Carlin reviewed by Stamatis Zampetakis) Minimize the risk of having wrong results, such as HIVE-26659, by disabling the ANTI join transformation by default. Closes #4101 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 7 +++++-- ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q | 1 + ql/src/test/queries/clientpositive/empty_result_outerjoin.q | 1 + ql/src/test/queries/clientpositive/explainuser_1.q | 3 ++- ql/src/test/queries/clientpositive/lineage3.q | 1 + ql/src/test/queries/clientpositive/perf/cbo_query16.q | 1 + ql/src/test/queries/clientpositive/perf/cbo_query69.q | 1 + ql/src/test/queries/clientpositive/perf/cbo_query78.q | 1 + ql/src/test/queries/clientpositive/perf/cbo_query94.q | 1 + ql/src/test/queries/clientpositive/perf/query16.q | 1 + ql/src/test/queries/clientpositive/perf/query69.q | 1 + ql/src/test/queries/clientpositive/perf/query78.q | 1 + ql/src/test/queries/clientpositive/perf/query94.q | 1 + .../clientpositive/subquery_complex_correlation_predicates.q | 4 +++- ql/src/test/queries/clientpositive/subquery_corr_join.q | 3 ++- ql/src/test/queries/clientpositive/subquery_exists_windowfunc.q | 1 + ql/src/test/queries/clientpositive/subquery_in_having.q | 1 + ql/src/test/queries/clientpositive/subquery_join_rewrite.q | 2 ++ ql/src/test/queries/clientpositive/subquery_multi.q | 1 + ql/src/test/queries/clientpositive/subquery_notexists.q | 1 + ql/src/test/queries/clientpositive/subquery_notexists_having.q | 3 ++- 21 files changed, 31 insertions(+), 6 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9376348a3e8..9f61c132bde 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2364,8 +2364,11 @@ public class HiveConf extends Configuration { "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size. \n" + "If this parameter is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the\n" + "specified size, the join is directly converted to a mapjoin (there is no conditional task)."), - HIVE_CONVERT_ANTI_JOIN("hive.auto.convert.anti.join", true, - "Whether Hive enables the optimization about converting join with null filter to anti join"), + HIVE_CONVERT_ANTI_JOIN("hive.auto.convert.anti.join", false, + "Whether Hive enables the optimization about converting join with null filter to anti join. The " + + "default is currently false because of HIVE-26659. There is the possibility of incorrect results. " + + "The incorrect results shown on testing were zeros and nulls on aggregate tpcds queries so it was " + + "fairly obvious they were incorrect. But be wary when turning on this flag."), HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD("hive.auto.convert.join.noconditionaltask.size", 10000000L, "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" + diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q index 3be73fe000a..6f571a0ad18 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q @@ -4,6 +4,7 @@ set hive.explain.user=false; ; set hive.exec.reducers.max = 1; +set hive.auto.convert.anti.join=true; CREATE TABLE tbl1_n5(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; CREATE TABLE tbl2_n4(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; diff --git a/ql/src/test/queries/clientpositive/empty_result_outerjoin.q b/ql/src/test/queries/clientpositive/empty_result_outerjoin.q index 9199529b9e4..f1f75c2774a 100644 --- a/ql/src/test/queries/clientpositive/empty_result_outerjoin.q +++ b/ql/src/test/queries/clientpositive/empty_result_outerjoin.q @@ -1,4 +1,5 @@ set hive.cli.print.header=true; +set hive.auto.convert.anti.join=true; create table t1 (a int, b varchar(10)); create table t2 (c int, d varchar(10)); diff --git a/ql/src/test/queries/clientpositive/explainuser_1.q b/ql/src/test/queries/clientpositive/explainuser_1.q index 747cc994864..8d446057d69 100644 --- a/ql/src/test/queries/clientpositive/explainuser_1.q +++ b/ql/src/test/queries/clientpositive/explainuser_1.q @@ -12,6 +12,7 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; set hive.explain.user=true; +set hive.auto.convert.anti.join=true; explain create table src_orc_merge_test_part_n1(key int, value string) partitioned by (ds string, ts string) stored as orc; create table src_orc_merge_test_part_n1(key int, value string) partitioned by (ds string, ts string) stored as orc; @@ -741,4 +742,4 @@ from (select * where b.value = a.value and a.key = b.key and a.value > 'val_9') ) a ; -set hive.auto.convert.anti.join=true; \ No newline at end of file +set hive.auto.convert.anti.join=true; diff --git a/ql/src/test/queries/clientpositive/lineage3.q b/ql/src/test/queries/clientpositive/lineage3.q index 8d5bd30c055..fa59ebd1242 100644 --- a/ql/src/test/queries/clientpositive/lineage3.q +++ b/ql/src/test/queries/clientpositive/lineage3.q @@ -4,6 +4,7 @@ SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.LineageLogger; set hive.metastore.disallow.incompatible.col.type.changes=false; +set hive.auto.convert.anti.join=true; drop table if exists d1; create table d1(a int); diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query16.q b/ql/src/test/queries/clientpositive/perf/cbo_query16.q index 74245aa10b8..5ce0b3dfb81 100644 --- a/ql/src/test/queries/clientpositive/perf/cbo_query16.q +++ b/ql/src/test/queries/clientpositive/perf/cbo_query16.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- start query 1 in stream 0 using template query16.tpl and seed 171719422 explain cbo select diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query69.q b/ql/src/test/queries/clientpositive/perf/cbo_query69.q index 01183fba762..c08c4f404c5 100644 --- a/ql/src/test/queries/clientpositive/perf/cbo_query69.q +++ b/ql/src/test/queries/clientpositive/perf/cbo_query69.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- start query 1 in stream 0 using template query69.tpl and seed 797269820 explain cbo select diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query78.q b/ql/src/test/queries/clientpositive/perf/cbo_query78.q index 87f94dcc4a7..dafb1e5eeae 100644 --- a/ql/src/test/queries/clientpositive/perf/cbo_query78.q +++ b/ql/src/test/queries/clientpositive/perf/cbo_query78.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- start query 1 in stream 0 using template query78.tpl and seed 1819994127 explain cbo with ws as diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query94.q b/ql/src/test/queries/clientpositive/perf/cbo_query94.q index 4ae01bf27e5..59ba68f14b0 100644 --- a/ql/src/test/queries/clientpositive/perf/cbo_query94.q +++ b/ql/src/test/queries/clientpositive/perf/cbo_query94.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- start query 1 in stream 0 using template query94.tpl and seed 2031708268 explain cbo select diff --git a/ql/src/test/queries/clientpositive/perf/query16.q b/ql/src/test/queries/clientpositive/perf/query16.q index 05625f71aad..725d849cc7e 100644 --- a/ql/src/test/queries/clientpositive/perf/query16.q +++ b/ql/src/test/queries/clientpositive/perf/query16.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- start query 1 in stream 0 using template query16.tpl and seed 171719422 explain select diff --git a/ql/src/test/queries/clientpositive/perf/query69.q b/ql/src/test/queries/clientpositive/perf/query69.q index ce2d19cc5d0..520b3722c3f 100644 --- a/ql/src/test/queries/clientpositive/perf/query69.q +++ b/ql/src/test/queries/clientpositive/perf/query69.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- start query 1 in stream 0 using template query69.tpl and seed 797269820 explain select diff --git a/ql/src/test/queries/clientpositive/perf/query78.q b/ql/src/test/queries/clientpositive/perf/query78.q index ca9e6d6cb1a..b15bb5210b4 100644 --- a/ql/src/test/queries/clientpositive/perf/query78.q +++ b/ql/src/test/queries/clientpositive/perf/query78.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- start query 1 in stream 0 using template query78.tpl and seed 1819994127 explain with ws as diff --git a/ql/src/test/queries/clientpositive/perf/query94.q b/ql/src/test/queries/clientpositive/perf/query94.q index 18253fa7d63..6a68e9b9e2a 100644 --- a/ql/src/test/queries/clientpositive/perf/query94.q +++ b/ql/src/test/queries/clientpositive/perf/query94.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- start query 1 in stream 0 using template query94.tpl and seed 2031708268 explain select diff --git a/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q b/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q index a32eb914170..ccfed8dcb23 100644 --- a/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q +++ b/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q @@ -1,5 +1,7 @@ -- Test cases with subqueries having complex correlation predicates. +set hive.auto.convert.anti.join=true; + -- HIVE-24957: Wrong results when subquery has COALESCE in correlation predicate create table author ( @@ -75,4 +77,4 @@ from book b where not exists (select a_authorkey from author a - where coalesce(b.b_authorkey, 400) = coalesce(a.a_authorkey, 400)); \ No newline at end of file + where coalesce(b.b_authorkey, 400) = coalesce(a.a_authorkey, 400)); diff --git a/ql/src/test/queries/clientpositive/subquery_corr_join.q b/ql/src/test/queries/clientpositive/subquery_corr_join.q index cb6e82fde07..b20cf102974 100644 --- a/ql/src/test/queries/clientpositive/subquery_corr_join.q +++ b/ql/src/test/queries/clientpositive/subquery_corr_join.q @@ -1,3 +1,4 @@ +set hive.auto.convert.anti.join=true; create table alltypestiny( id int, int_col int, @@ -70,4 +71,4 @@ from ta where not exists (select 1 from tb inner JOIN tc - on ta.id = tb.id); \ No newline at end of file + on ta.id = tb.id); diff --git a/ql/src/test/queries/clientpositive/subquery_exists_windowfunc.q b/ql/src/test/queries/clientpositive/subquery_exists_windowfunc.q index 352f1e8fefb..3f1a2c47e5c 100644 --- a/ql/src/test/queries/clientpositive/subquery_exists_windowfunc.q +++ b/ql/src/test/queries/clientpositive/subquery_exists_windowfunc.q @@ -1,3 +1,4 @@ +set hive.auto.convert.anti.join=true; create table alltypestiny( id int, int_col int, diff --git a/ql/src/test/queries/clientpositive/subquery_in_having.q b/ql/src/test/queries/clientpositive/subquery_in_having.q index 2861c4e3e26..acb3c23da8f 100644 --- a/ql/src/test/queries/clientpositive/subquery_in_having.q +++ b/ql/src/test/queries/clientpositive/subquery_in_having.q @@ -1,5 +1,6 @@ --! qt:dataset:src set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- SORT_QUERY_RESULTS -- data setup diff --git a/ql/src/test/queries/clientpositive/subquery_join_rewrite.q b/ql/src/test/queries/clientpositive/subquery_join_rewrite.q index 1f4ba715631..ae7c2762d2d 100644 --- a/ql/src/test/queries/clientpositive/subquery_join_rewrite.q +++ b/ql/src/test/queries/clientpositive/subquery_join_rewrite.q @@ -1,5 +1,7 @@ -- SORT_QUERY_RESULTS +set hive.auto.convert.anti.join=true; + create table web_sales (ws_order_number int, ws_warehouse_sk int) stored as orc; insert into web_sales values diff --git a/ql/src/test/queries/clientpositive/subquery_multi.q b/ql/src/test/queries/clientpositive/subquery_multi.q index 7389384f20b..40e9e251ee5 100644 --- a/ql/src/test/queries/clientpositive/subquery_multi.q +++ b/ql/src/test/queries/clientpositive/subquery_multi.q @@ -3,6 +3,7 @@ --! qt:dataset:lineitem set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.auto.convert.anti.join=true; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/subquery_notexists.q b/ql/src/test/queries/clientpositive/subquery_notexists.q index 741366751e7..c4707e62312 100644 --- a/ql/src/test/queries/clientpositive/subquery_notexists.q +++ b/ql/src/test/queries/clientpositive/subquery_notexists.q @@ -2,6 +2,7 @@ --! qt:dataset:part -- SORT_QUERY_RESULTS set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- no agg, corr diff --git a/ql/src/test/queries/clientpositive/subquery_notexists_having.q b/ql/src/test/queries/clientpositive/subquery_notexists_having.q index cce179a7c7b..fefb9ed4cf1 100644 --- a/ql/src/test/queries/clientpositive/subquery_notexists_having.q +++ b/ql/src/test/queries/clientpositive/subquery_notexists_having.q @@ -1,5 +1,6 @@ --! qt:dataset:src set hive.mapred.mode=nonstrict; +set hive.auto.convert.anti.join=true; -- no agg, corr @@ -45,4 +46,4 @@ having not exists from src a where b.value = a.value and a.value > 'val_12' ) -; \ No newline at end of file +;