This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 7d467160e62 HIVE-28729: Apply nulls order setting in Reduce Sink
operator of join branches (Krisztian Kasa, reviewed by Stamatis Zampetakis)
7d467160e62 is described below
commit 7d467160e623a187973921fa9658a31ff410308c
Author: Krisztian Kasa <[email protected]>
AuthorDate: Tue Feb 4 15:39:26 2025 +0100
HIVE-28729: Apply nulls order setting in Reduce Sink operator of join
branches (Krisztian Kasa, reviewed by Stamatis Zampetakis)
---
.../calcite/rules/HiveInsertExchange4JoinRule.java | 19 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +-
.../queries/clientpositive/cbo_rp_null_order.q | 12 ++
.../clientpositive/llap/cbo_rp_null_order.q.out | 224 +++++++++++++++++++++
4 files changed, 245 insertions(+), 15 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java
index 0c8c5e1a8ed..08107adda2a 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java
@@ -30,8 +30,6 @@
import org.apache.calcite.rel.core.Exchange;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rex.RexNode;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo;
@@ -56,20 +54,13 @@
*/
public class HiveInsertExchange4JoinRule extends RelOptRule {
- protected static transient final Logger LOG = LoggerFactory
- .getLogger(HiveInsertExchange4JoinRule.class);
+ private final RelFieldCollation.NullDirection defaultAscNullDirection;
- /** Rule that creates Exchange operators under a MultiJoin operator. */
- public static final HiveInsertExchange4JoinRule EXCHANGE_BELOW_MULTIJOIN =
- new HiveInsertExchange4JoinRule(HiveMultiJoin.class);
-
- /** Rule that creates Exchange operators under a Join operator. */
- public static final HiveInsertExchange4JoinRule EXCHANGE_BELOW_JOIN =
- new HiveInsertExchange4JoinRule(Join.class);
-
- public HiveInsertExchange4JoinRule(Class<? extends RelNode> clazz) {
+ public HiveInsertExchange4JoinRule(
+ Class<? extends RelNode> clazz, RelFieldCollation.NullDirection
defaultAscNullDirection) {
// match multijoin or join
super(RelOptRule.operand(clazz, any()));
+ this.defaultAscNullDirection = defaultAscNullDirection;
}
@Override
@@ -118,7 +109,7 @@ public void onMatch(RelOptRuleCall call) {
for (int pos : joinLeafPredInfo.getProjsJoinKeysInChildSchema(i)) {
if (!joinKeyPositions.contains(pos)) {
joinKeyPositions.add(pos);
- collationListBuilder.add(new RelFieldCollation(pos));
+ collationListBuilder.add(new RelFieldCollation(pos,
RelFieldCollation.Direction.ASCENDING, defaultAscNullDirection));
}
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index c662417b371..f2e17993da6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -84,6 +84,7 @@
import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.CorrelationId;
import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.SetOp;
import org.apache.calcite.rel.core.TableScan;
@@ -2416,7 +2417,9 @@ private RelNode applyPostJoinOrderingTransform(RelNode
basePlan, RelMetadataProv
// 9.2. Introduce exchange operators below join/multijoin operators
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
- HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN,
HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN);
+ new HiveInsertExchange4JoinRule(Join.class,
NullOrdering.defaultNullOrder(conf).getDirection()),
+ new HiveInsertExchange4JoinRule(
+ HiveMultiJoin.class,
NullOrdering.defaultNullOrder(conf).getDirection()));
} else {
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
HiveProjectSortExchangeTransposeRule.INSTANCE,
HiveProjectMergeRule.INSTANCE);
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_null_order.q
b/ql/src/test/queries/clientpositive/cbo_rp_null_order.q
new file mode 100644
index 00000000000..a2ff1e2be7d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_null_order.q
@@ -0,0 +1,12 @@
+SET hive.cbo.returnpath.hiveop=true;
+SET hive.default.nulls.last=false;
+
+CREATE TABLE t1(key int, value string);
+
+EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key;
+EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key;
+
+SET hive.default.nulls.last=true;
+
+EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key;
+EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key;
diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_null_order.q.out
b/ql/src/test/results/clientpositive/llap/cbo_rp_null_order.q.out
new file mode 100644
index 00000000000..071f824365f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_rp_null_order.q.out
@@ -0,0 +1,224 @@
+PREHOOK: query: CREATE TABLE t1(key int, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: CREATE TABLE t1(key int, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key =
b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not
available])
+ HiveSortExchange(distribution=[hash[0]], collation=[[0 ASC-nulls-first]])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t1]], qbid:alias=[a])
+ HiveSortExchange(distribution=[hash[0]], collation=[[0 ASC-nulls-first]])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t1]], qbid:alias=[b])
+
+PREHOOK: query: EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: key, value, key0, value0
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key =
b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not
available])
+ HiveSortExchange(distribution=[hash[0]], collation=[[0]])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t1]], qbid:alias=[a])
+ HiveSortExchange(distribution=[hash[0]], collation=[[0]])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t1]], qbid:alias=[b])
+
+PREHOOK: query: EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: key, value, key0, value0
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+