HIVE-18875 : Enable SMB Join by default in Tez (Deepak Jaiswal, reviewed by Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0a961aa8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0a961aa8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0a961aa8 Branch: refs/heads/master Commit: 0a961aa8fd2187e901ec071ea9ddc3ab4d4970c5 Parents: 773034f Author: Deepak Jaiswal <djais...@apache.org> Authored: Sun Jun 10 00:25:34 2018 -0700 Committer: Deepak Jaiswal <djais...@apache.org> Committed: Sun Jun 10 17:55:30 2018 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../test/resources/testconfiguration.properties | 1 + .../hive/ql/exec/CommonMergeJoinOperator.java | 31 +- .../hadoop/hive/ql/exec/GroupByOperator.java | 2 +- .../apache/hadoop/hive/ql/exec/Operator.java | 12 + .../hive/ql/exec/tez/ReduceRecordSource.java | 10 + .../hive/ql/optimizer/ConvertJoinMapJoin.java | 17 + .../annotation/OpTraitsRulesProcFactory.java | 41 +- .../clientpositive/auto_sortmerge_join_11.q | 1 + .../clientpositive/auto_sortmerge_join_6.q | 4 +- .../test/queries/clientpositive/skewjoinopt19.q | 1 + .../test/queries/clientpositive/skewjoinopt20.q | 1 + .../queries/clientpositive/smb_mapjoin_11.q | 3 +- .../queries/clientpositive/smb_mapjoin_12.q | 2 +- .../queries/clientpositive/smb_mapjoin_17.q | 1 + .../queries/clientpositive/subquery_notin.q | 2 +- .../llap/auto_sortmerge_join_6.q.out | 150 ++-- .../llap/correlationoptimizer2.q.out | 450 +++++------- .../llap/correlationoptimizer6.q.out | 170 ++--- .../clientpositive/llap/explainuser_1.q.out | 143 ++-- .../clientpositive/llap/limit_pushdown.q.out | 83 +-- .../results/clientpositive/llap/mergejoin.q.out | 715 ++++--------------- .../test/results/clientpositive/llap/mrr.q.out | 132 ++-- .../llap/offset_limit_ppd_optimizer.q.out | 85 +-- .../results/clientpositive/llap/smb_cache.q.out | 50 +- .../clientpositive/llap/smb_mapjoin_14.q.out | 644 ++++++----------- .../clientpositive/llap/smb_mapjoin_15.q.out | 101 +-- .../clientpositive/llap/smb_mapjoin_4.q.out | 544 ++++---------- .../clientpositive/llap/smb_mapjoin_5.q.out | 544 ++++---------- .../clientpositive/llap/smb_mapjoin_6.q.out | 415 ++++------- .../llap/subquery_in_having.q.out | 160 ++--- .../clientpositive/llap/subquery_notin.q.out | 124 ++-- .../llap/vectorized_bucketmapjoin1.q.out | 307 ++------ .../clientpositive/spark/bucketmapjoin1.q.out | 210 ++---- .../clientpositive/spark/smb_mapjoin_14.q.out | 711 +++++------------- .../clientpositive/spark/smb_mapjoin_15.q.out | 399 ++--------- .../clientpositive/spark/smb_mapjoin_4.q.out | 640 ++++------------- .../clientpositive/spark/smb_mapjoin_5.q.out | 640 ++++------------- .../clientpositive/spark/smb_mapjoin_6.q.out | 331 +++------ .../clientpositive/spark/subquery_notin.q.out | 8 +- 40 files changed, 2329 insertions(+), 5558 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 87db42a..1e8a389 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2076,7 +2076,7 @@ public class HiveConf extends Configuration { HIVE_ENFORCE_NOT_NULL_CONSTRAINT("hive.constraint.notnull.enforce", true, "Should \"IS NOT NULL \" constraint be enforced?"), - HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", false, + HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", true, "Will the join be automatically converted to a sort-merge join, if the joined tables pass the criteria for sort-merge join."), HIVE_AUTO_SORTMERGE_JOIN_REDUCE("hive.auto.convert.sortmerge.join.reduce.side", true, "Whether hive.auto.convert.sortmerge.join (if enabled) should be applied to reduce side."), http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 0f13d62..c71c540 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -701,6 +701,7 @@ minillaplocal.query.files=\ tez_smb_1.q,\ tez_smb_empty.q,\ tez_smb_main.q,\ + tez_smb_reduce_side.q,\ tez_union.q,\ tez_union2.q,\ tez_union_decimal.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java index aefaa05..8f98ace 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java @@ -25,6 +25,8 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; + +import org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -148,7 +150,7 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge for (byte pos = 0; pos < order.length; pos++) { if (pos != posBigTable) { - if ((parentOperators != null) && (parentOperators.isEmpty() == false) + if ((parentOperators != null) && !parentOperators.isEmpty() && (parentOperators.get(pos) instanceof TezDummyStoreOperator)) { TezDummyStoreOperator dummyStoreOp = (TezDummyStoreOperator) parentOperators.get(pos); fetchDone[pos] = dummyStoreOp.getFetchDone(); @@ -161,6 +163,15 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge sources = ((TezContext) MapredContext.get()).getRecordSources(); interruptChecker = new InterruptibleProcessing(); + + if (sources[0] instanceof ReduceRecordSource && + parentOperators != null && !parentOperators.isEmpty()) { + // Tell ReduceRecordSource to flush last record as this is a reduce + // side SMB + for (RecordSource source : sources) { + ((ReduceRecordSource) source).setFlushLastRecord(true); + } + } } /* @@ -230,7 +241,7 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge continue; } - if (foundNextKeyGroup[i] == false) { + if (!foundNextKeyGroup[i]) { canEmit = false; break; } @@ -258,13 +269,12 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge // catch up with the big table. if (nextKeyGroup) { assert tag == posBigTable; - List<Byte> smallestPos = null; + List<Byte> listOfFetchNeeded = null; do { - smallestPos = joinOneGroup(); + listOfFetchNeeded = joinOneGroup(); //jump out the loop if we need input from the big table - } while (smallestPos != null && smallestPos.size() > 0 - && !smallestPos.contains(this.posBigTable)); - + } while (listOfFetchNeeded != null && listOfFetchNeeded.size() > 0 + && !listOfFetchNeeded.contains(this.posBigTable)); return; } @@ -360,6 +370,9 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge } private void fetchNextGroup(Byte t) throws HiveException { + if (keyWritables[t] != null) { + return; // First process the current key. + } if (foundNextKeyGroup[t]) { // first promote the next group to be the current group if we reached a // new group in the previous fetch @@ -530,6 +543,10 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge } else { int cmp = compareKeys(alias, key, keyWritable); if (cmp != 0) { + // Cant overwrite existing keys + if (nextKeyWritables[alias] != null) { + throw new HiveException("Attempting to overwrite nextKeyWritables[" + alias + "]"); + } nextKeyWritables[alias] = key; return true; } http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index 4b76638..4882e61 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -734,7 +734,7 @@ public class GroupByOperator extends Operator<GroupByDesc> implements IConfigure @Override public void process(Object row, int tag) throws HiveException { firstRow = false; - ObjectInspector rowInspector = inputObjInspectors[tag]; + ObjectInspector rowInspector = inputObjInspectors[0]; // Total number of input rows is needed for hash aggregation only if (hashAggr) { numRowsInput++; http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 108bb57..4e9784d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -663,6 +663,18 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C public void flush() throws HiveException { } + // Recursive flush to flush all the tree operators + public void flushRecursive() throws HiveException { + flush(); + if (childOperators == null) { + return; + } + + for (Operator<?> child : childOperators) { + child.flushRecursive(); + } + } + public void processGroup(int tag) throws HiveException { if (childOperators == null || childOperators.isEmpty()) { return; http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index fca783c..688fde8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -123,6 +123,9 @@ public class ReduceRecordSource implements RecordSource { private long vectorizedVertexNum; private int vectorizedTestingReducerBatchSize; + // Flush the last record when reader is out of records + private boolean flushLastRecord = false; + void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum, @@ -254,6 +257,9 @@ public class ReduceRecordSource implements RecordSource { try { if (!reader.next()) { + if (flushLastRecord) { + reducer.flushRecursive(); + } return false; } @@ -508,4 +514,8 @@ public class ReduceRecordSource implements RecordSource { public ObjectInspector getObjectInspector() { return rowObjectInspector; } + + public void setFlushLastRecord(boolean flushLastRecord) { + this.flushLastRecord = flushLastRecord; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 4019f13..9a7b1ea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -608,6 +608,23 @@ public class ConvertJoinMapJoin implements NodeProcessor { return false; } ReduceSinkOperator rsOp = (ReduceSinkOperator) parentOp; + List<ExprNodeDesc> keyCols = rsOp.getConf().getKeyCols(); + + // For SMB, the key column(s) in RS should be same as bucket column(s) and sort column(s)` + List<String> sortCols = rsOp.getOpTraits().getSortCols().get(0); + List<String> bucketCols = rsOp.getOpTraits().getBucketColNames().get(0); + if (sortCols.size() != keyCols.size() || bucketCols.size() != keyCols.size()) { + return false; + } + + // Check columns. + for (int i = 0; i < sortCols.size(); i++) { + ExprNodeDesc sortCol = rsOp.getColumnExprMap().get(sortCols.get(i)); + ExprNodeDesc bucketCol = rsOp.getColumnExprMap().get(bucketCols.get(i)); + if (!(sortCol.isSame(keyCols.get(i)) && bucketCol.isSame(keyCols.get(i)))) { + return false; + } + } if (!checkColEquality(rsOp.getParentOperators().get(0).getOpTraits().getSortCols(), rsOp .getOpTraits().getSortCols(), rsOp.getColumnExprMap(), false)) { http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java index 9e54465..dbcbbfd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java @@ -260,23 +260,43 @@ public class OpTraitsRulesProcFactory { public static class SelectRule implements NodeProcessor { + boolean processSortCols = false; + + // For bucket columns + // If all the columns match to the parent, put them in the bucket cols + // else, add empty list. + // For sort columns + // Keep the subset of all the columns as long as order is maintained. public List<List<String>> getConvertedColNames( List<List<String>> parentColNames, SelectOperator selOp) { - List<List<String>> listBucketCols = new ArrayList<List<String>>(); + List<List<String>> listBucketCols = new ArrayList<>(); if (selOp.getColumnExprMap() != null) { if (parentColNames != null) { for (List<String> colNames : parentColNames) { - List<String> bucketColNames = new ArrayList<String>(); + List<String> bucketColNames = new ArrayList<>(); + boolean found = false; for (String colName : colNames) { for (Entry<String, ExprNodeDesc> entry : selOp.getColumnExprMap().entrySet()) { - if (entry.getValue() instanceof ExprNodeColumnDesc) { - if (((ExprNodeColumnDesc) (entry.getValue())).getColumn().equals(colName)) { - bucketColNames.add(entry.getKey()); - } + if ((entry.getValue() instanceof ExprNodeColumnDesc) && + (((ExprNodeColumnDesc) (entry.getValue())).getColumn().equals(colName))) { + bucketColNames.add(entry.getKey()); + found = true; + break; } } + if (!found) { + // Bail out on first missed column. + break; + } + } + if (!processSortCols && !found) { + // While processing bucket columns, atleast one bucket column + // missed. This results in a different bucketing scheme. + // Add empty list + listBucketCols.add(new ArrayList<>()); + } else { + listBucketCols.add(bucketColNames); } - listBucketCols.add(bucketColNames); } } } @@ -300,6 +320,7 @@ public class OpTraitsRulesProcFactory { List<List<String>> parentSortColNames = selOp.getParentOperators().get(0).getOpTraits().getSortCols(); if (parentSortColNames != null) { + processSortCols = true; listSortCols = getConvertedColNames(parentSortColNames, selOp); } } @@ -309,7 +330,11 @@ public class OpTraitsRulesProcFactory { int bucketingVersion = -1; OpTraits parentOpTraits = selOp.getParentOperators().get(0).getOpTraits(); if (parentOpTraits != null) { - numBuckets = parentOpTraits.getNumBuckets(); + // if bucket columns are empty, then numbuckets must be set to -1. + if (listBucketCols != null && + !(listBucketCols.isEmpty() || listBucketCols.get(0).isEmpty())) { + numBuckets = parentOpTraits.getNumBuckets(); + } numReduceSinks = parentOpTraits.getNumReduceSinks(); bucketingVersion = parentOpTraits.getBucketingVersion(); } http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q index 7416eb0..76e615f 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q @@ -22,6 +22,7 @@ load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO set hive.auto.convert.join=true; -- disable hash joins set hive.auto.convert.join.noconditionaltask.size=10; +set hive.auto.convert.sortmerge.join=false; explain extended select count(*) FROM bucket_small_n11 a JOIN bucket_big_n11 b ON a.key = b.key; select count(*) FROM bucket_small_n11 a JOIN bucket_big_n11 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q index 551e5f7..0aeec0e 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q @@ -53,7 +53,7 @@ select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.k explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key; select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key; --- A SMB join is being followed by a regular join on a bucketed table on a different key +-- The join order ensures there is no SMB explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value; select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value; @@ -71,6 +71,6 @@ select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.k explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key; select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key; --- A SMB join is being followed by a regular join on a bucketed table on a different key +-- The join order ensures there is no SMB explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value; select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value; http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/skewjoinopt19.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt19.q b/ql/src/test/queries/clientpositive/skewjoinopt19.q index 02cadda..df8ab71 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt19.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt19.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; +set hive.auto.convert.sortmerge.join=false; CREATE TABLE T1_n34(key STRING, val STRING) CLUSTERED BY (key) INTO 4 BUCKETS http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/skewjoinopt20.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt20.q b/ql/src/test/queries/clientpositive/skewjoinopt20.q index 160e5b8..bebe007 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt20.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt20.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; +set hive.auto.convert.sortmerge.join=false; CREATE TABLE T1_n103(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/smb_mapjoin_11.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_11.q b/ql/src/test/queries/clientpositive/smb_mapjoin_11.q index 6ce49b8..d0cea5b 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_11.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_11.q @@ -8,7 +8,8 @@ set hive.cbo.enable=false; set hive.exec.reducers.max = 1; set hive.merge.mapfiles=false; -set hive.merge.mapredfiles=false; +set hive.merge.mapredfiles=false; +set hive.auto.convert.sortmerge.join=false; -- This test verifies that the output of a sort merge join on 2 partitions (one on each side of the join) is bucketed http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/smb_mapjoin_12.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_12.q b/ql/src/test/queries/clientpositive/smb_mapjoin_12.q index 753e4d3..6f9ecab 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_12.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_12.q @@ -21,7 +21,7 @@ INSERT OVERWRITE TABLE test_table2_n6 PARTITION (ds = '2') SELECT * INSERT OVERWRITE TABLE test_table2_n6 PARTITION (ds = '3') SELECT *; - +set hive.auto.convert.sortmerge.join=false; -- Create a bucketed table CREATE TABLE test_table3_n4 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS; http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/smb_mapjoin_17.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_17.q b/ql/src/test/queries/clientpositive/smb_mapjoin_17.q index d68f5f3..7454445 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_17.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_17.q @@ -43,6 +43,7 @@ SELECT * FROM src WHERE key < 10; INSERT OVERWRITE TABLE test_table8 SELECT * FROM src WHERE key < 10; +set hive.auto.convert.sortmerge.join=false; -- Mapjoin followed by a aggregation should be performed in a single MR job upto 7 tables EXPLAIN SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/subquery_notin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q b/ql/src/test/queries/clientpositive/subquery_notin.q index 6494027..a2d93df 100644 --- a/ql/src/test/queries/clientpositive/subquery_notin.q +++ b/ql/src/test/queries/clientpositive/subquery_notin.q @@ -177,7 +177,7 @@ SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0); -- corr explain SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where t1_n0.c2=t2_n0.c1); -SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where t1_n0.c1=t2_n0.c1); +SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where t1_n0.c2=t2_n0.c1); DROP TABLE t1_n0; DROP TABLE t2_n0; http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out index b13beab..828c6e1 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out @@ -772,23 +772,13 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -798,23 +788,36 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan + alias: c + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -838,6 +841,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: 0 _col0 (type: int) 1 _col0 (type: int) Statistics: Num rows: 1343 Data size: 10744 Basic stats: COMPLETE Column stats: COMPLETE @@ -850,7 +869,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -884,7 +903,7 @@ POSTHOOK: Input: default@tbl1_n4 POSTHOOK: Input: default@tbl2_n3 POSTHOOK: Input: default@tbl4 #### A masked pattern was here #### -90 +2654 PREHOOK: query: explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value PREHOOK: type: QUERY POSTHOOK: query: explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value @@ -1295,23 +1314,13 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1321,23 +1330,36 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan + alias: c + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1361,6 +1383,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: 0 _col0 (type: int) 1 _col0 (type: int) Statistics: Num rows: 1343 Data size: 10744 Basic stats: COMPLETE Column stats: COMPLETE @@ -1373,7 +1411,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1407,4 +1445,4 @@ POSTHOOK: Input: default@tbl1_n4 POSTHOOK: Input: default@tbl2_n3 POSTHOOK: Input: default@tbl4 #### A masked pattern was here #### -90 +2654 http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out index 8e17d95..879c999 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out @@ -21,10 +21,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -49,7 +47,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: y @@ -72,45 +70,43 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) - mode: hash + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -125,21 +121,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -189,10 +170,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -217,7 +196,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: y @@ -240,45 +219,43 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) - mode: hash + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -293,21 +270,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -357,10 +319,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -386,7 +346,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: y @@ -406,45 +366,43 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) - mode: hash + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -459,21 +417,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -523,10 +466,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -552,7 +493,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: y @@ -572,45 +513,43 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) - mode: hash + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -625,21 +564,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -689,10 +613,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -714,7 +636,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: y @@ -737,8 +659,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -746,37 +667,36 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) - mode: hash + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -791,21 +711,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -855,10 +760,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -880,7 +783,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: y @@ -903,8 +806,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -912,37 +814,36 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) - mode: hash + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -957,21 +858,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out index 9e424c2..1fcd6ed 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out @@ -2659,11 +2659,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Map 8 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2688,7 +2686,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: y @@ -2710,7 +2708,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: z @@ -2731,44 +2729,42 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1), sum(_col3) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1), sum(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 4 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2783,7 +2779,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -2805,21 +2801,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2913,11 +2894,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Map 8 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2942,7 +2921,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: y @@ -2964,7 +2943,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: z @@ -2985,44 +2964,42 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1), sum(_col3) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1), sum(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 4 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3037,7 +3014,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -3059,21 +3036,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator