Repository: hive Updated Branches: refs/heads/master 5861b6af5 -> 41f72dc3e
HIVE-16804: Semijoin hint : Needs support for target table. (Deepak Jaiswal, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/41f72dc3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/41f72dc3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/41f72dc3 Branch: refs/heads/master Commit: 41f72dc3eda0e2744ea3787560ef12ec1d994038 Parents: 5861b6a Author: Jason Dere <[email protected]> Authored: Thu Jun 8 10:11:35 2017 -0700 Committer: Jason Dere <[email protected]> Committed: Thu Jun 8 10:11:35 2017 -0700 ---------------------------------------------------------------------- .../DynamicPartitionPruningOptimization.java | 5 ++ .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 22 +++-- .../hadoop/hive/ql/parse/SemiJoinHint.java | 8 +- .../hadoop/hive/ql/parse/TezCompiler.java | 16 ++-- .../test/queries/clientpositive/semijoin_hint.q | 32 +++---- .../clientpositive/llap/semijoin_hint.q.out | 91 ++++++++------------ 6 files changed, 89 insertions(+), 85 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index 8a62982..562caf9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -226,6 +226,7 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor { if (tabAliasBuilder.length() > 0) { tableAlias = tabAliasBuilder.toString(); } else { + //falling back Operator<?> op = ctx.generator; while (!(op == null || op instanceof TableScanOperator)) { @@ -361,6 +362,10 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor { if (!colName.equals(sjHint.getColName())) { continue; } + if (!ts.getConf().getAlias().equals(sjHint.getTarget())) { + continue; + } + // match! LOG.info("Creating runtime filter due to user hint: column = " + colName); if (generateSemiJoinOperatorPlan(ctx, pCtx, ts, keyBaseAlias, http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d514644..9e84a29 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9034,8 +9034,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { /** Parses semjoin hints in the query and returns the table names mapped to filter size, or -1 if not specified. * Hints can be in 2 formats - * 1. TableName, ColumnName, bloom filter entries - * 2. TableName, ColumnName + * 1. TableName, ColumnName, Target-TableName, bloom filter entries + * 2. TableName, ColumnName, Target-TableName * */ private Map<String, List<SemiJoinHint>> parseSemiJoinHint(List<ASTNode> hints) throws SemanticException { if (hints == null || hints.size() == 0) return null; @@ -9071,15 +9071,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { throws SemanticException { // Check if there are enough entries in the tree to constitute a hint. int numEntriesLeft = args.getChildCount() - curIdx; - if (numEntriesLeft < 2) { + if (numEntriesLeft < 3) { throw new SemanticException("User provided only 1 entry for the hint with alias " + args.getChild(curIdx).getText()); } - String alias = args.getChild(curIdx++).getText(); + String source = args.getChild(curIdx++).getText(); // validate - if (StringUtils.isNumeric(alias)) { - throw new SemanticException("User provided bloom filter entries when alias is expected"); + if (StringUtils.isNumeric(source)) { + throw new SemanticException("User provided bloom filter entries when source alias is expected"); } String colName = args.getChild(curIdx++).getText(); @@ -9088,8 +9088,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { throw new SemanticException("User provided bloom filter entries when column name is expected"); } + String target = args.getChild(curIdx++).getText(); + // validate + if (StringUtils.isNumeric(colName)) { + throw new SemanticException("User provided bloom filter entries when target alias is expected"); + } + Integer number = null; - if (numEntriesLeft > 2) { + if (numEntriesLeft > 3) { // Check if there exists bloom filter size entry try { number = Integer.parseInt(args.getChild(curIdx).getText()); @@ -9097,7 +9103,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } catch (NumberFormatException e) { // Ignore } } - result.computeIfAbsent(alias, value -> new ArrayList<>()).add(new SemiJoinHint(colName, number)); + result.computeIfAbsent(source, value -> new ArrayList<>()).add(new SemiJoinHint(colName, target, number)); return curIdx; } http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java index f7fd306..b2c123f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java @@ -20,17 +20,21 @@ package org.apache.hadoop.hive.ql.parse; public class SemiJoinHint { private String colName; + private String target; private Integer numEntries; - public SemiJoinHint(String colName, Integer numEntries) { + public SemiJoinHint(String colName, String target, Integer numEntries) { this.colName = colName; + this.target = target; this.numEntries = numEntries; } public String getColName() { return colName; } - + public String getTarget() { + return target; + } public Integer getNumEntries() { return numEntries != null ? numEntries : -1; } http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 7e156f6..20f16fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -743,15 +743,15 @@ public class TezCompiler extends TaskCompiler { SemiJoinBranchInfo sjInfo = pctx.getRsToSemiJoinBranchInfo().get(rs); if (sjInfo != null && ts == sjInfo.getTsOp()) { // match! + if (sjInfo.getIsHint()) { + throw new SemanticException("Removing hinted semijoin as it is with SMB join " + rs + " : " + ts); + } if (LOG.isDebugEnabled()) { LOG.debug("Semijoin optimization found going to SMB join. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); } GenTezUtils.removeBranch(rs); GenTezUtils.removeSemiJoinOperator(pctx, rs, ts); - if (sjInfo.getIsHint()) { - LOG.debug("Removing hinted semijoin as it is with SMB join " + rs + " : " + ts); - } } } } @@ -848,15 +848,15 @@ public class TezCompiler extends TaskCompiler { if (parent == ts) { // We have a cycle! + if (sjInfo.getIsHint()) { + throw new SemanticException("Removing hinted semijoin as it is creating cycles with mapside joins " + rs + " : " + ts); + } if (LOG.isDebugEnabled()) { LOG.debug("Semijoin cycle due to mapjoin. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); } GenTezUtils.removeBranch(rs); GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); - if (sjInfo.getIsHint()) { - LOG.debug("Removing hinted semijoin as it is creating cycles with mapside joins " + rs + " : " + ts); - } } } } @@ -895,6 +895,10 @@ public class TezCompiler extends TaskCompiler { long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries(); if (expectedEntries == -1 || expectedEntries > pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) { + if (sjInfo.getIsHint()) { + throw new SemanticException("Removing hinted semijoin due to lack to stats" + + " or exceeding max bloom filter entries"); + } // Remove the semijoin optimization branch along with ALL the mappings // The parent GB2 has all the branches. Collect them and remove them. for (Operator<?> op : gbOp.getChildOperators()) { http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/test/queries/clientpositive/semijoin_hint.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/semijoin_hint.q b/ql/src/test/queries/clientpositive/semijoin_hint.q index 71fa445..2a15344 100644 --- a/ql/src/test/queries/clientpositive/semijoin_hint.q +++ b/ql/src/test/queries/clientpositive/semijoin_hint.q @@ -49,51 +49,51 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k -- Skip semijoin by using keyword "None" as argument explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); +EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -- This should NOT create a semijoin -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); set hive.cbo.returnpath.hiveop=false; -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); -- Query which creates semijoin explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -- Skip semijoin by using keyword "None" as argument explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); +EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -- This should NOT create a semijoin -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); set hive.cbo.enable=false; -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); -- Query which creates semijoin explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -- Skip semijoin by using keyword "None" as argument explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); +EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -- This should NOT create a semijoin -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); http://git-wip-us.apache.org/repos/asf/hive/blob/41f72dc3/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index ae9bf9b..76c985e 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -387,9 +387,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -552,9 +552,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -566,7 +566,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 5 <- Reducer 4 (BROADCAST_EDGE) - Map 6 <- Reducer 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) @@ -629,10 +628,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) + predicate: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -695,10 +694,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -706,9 +701,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -833,9 +828,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -933,13 +928,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +PREHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +POSTHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1390,9 +1385,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1553,9 +1548,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1567,7 +1562,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 5 <- Reducer 4 (BROADCAST_EDGE) - Map 6 <- Reducer 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) @@ -1630,10 +1624,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) + predicate: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -1694,10 +1688,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1705,9 +1695,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1830,9 +1820,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1928,13 +1918,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +PREHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +POSTHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2353,9 +2343,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2504,9 +2494,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2517,7 +2507,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) Map 4 <- Reducer 6 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) @@ -2528,10 +2517,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) + predicate: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) @@ -2633,10 +2622,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -2644,9 +2629,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2761,9 +2746,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage
