HIVE-11954: Extend logic to choose side table in MapJoin Conversion algorithm (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8e62edac Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8e62edac Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8e62edac Branch: refs/heads/master Commit: 8e62edac34538d38d3ba4db158481f5d7735199f Parents: 37a8fe0 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Thu Oct 22 13:43:14 2015 -0700 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Fri Oct 23 08:30:15 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/OperatorUtils.java | 21 + .../hive/ql/optimizer/ConvertJoinMapJoin.java | 99 +- .../llap/bucket_map_join_tez1.q.out | 244 ++-- .../llap/bucket_map_join_tez2.q.out | 112 +- .../llap/dynamic_partition_pruning.q.out | 130 ++- .../clientpositive/llap/explainuser_2.q.out | 1070 +++++++++--------- .../test/results/clientpositive/llap/mrr.q.out | 53 +- .../vectorized_dynamic_partition_pruning.q.out | 130 ++- .../tez/auto_sortmerge_join_10.q.out | 57 +- .../tez/auto_sortmerge_join_12.q.out | 95 +- .../tez/bucket_map_join_tez1.q.out | 236 ++-- .../tez/bucket_map_join_tez2.q.out | 108 +- .../tez/cross_product_check_2.q.out | 195 ++-- .../tez/dynamic_partition_pruning.q.out | 128 ++- .../clientpositive/tez/explainuser_2.q.out | 1070 +++++++++--------- .../test/results/clientpositive/tez/mrr.q.out | 53 +- .../clientpositive/tez/unionDistinct_1.q.out | 58 +- .../vectorized_dynamic_partition_pruning.q.out | 130 ++- 18 files changed, 2004 insertions(+), 1985 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8e62edac/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java index f00fc77..bd10912 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java @@ -277,4 +277,25 @@ public class OperatorUtils { } return resultMap.build(); } + + /** + * Given an operator and a set of classes, it returns the number of operators it finds + * upstream that instantiate any of the given classes. + * + * @param start the start operator + * @param classes the set of classes + * @return the number of operators + */ + public static int countOperatorsUpstream(Operator<?> start, Set<Class<? extends Operator<?>>> classes) { + Multimap<Class<? extends Operator<?>>, Operator<?>> ops = classifyOperatorsUpstream(start, classes); + int numberOperators = 0; + Set<Operator<?>> uniqueOperators = new HashSet<Operator<?>>(); + for (Operator<?> op : ops.values()) { + if (uniqueOperators.add(op)) { + numberOperators++; + } + } + return numberOperators; + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/8e62edac/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 024849e..e63de7a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -32,18 +32,22 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MuxOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.PTFOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; +import org.apache.hadoop.hive.ql.exec.UDTFOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -61,6 +65,8 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.util.ReflectionUtils; +import com.google.common.collect.ImmutableSet; + /** * ConvertJoinMapJoin is an optimization that replaces a common join * (aka shuffle join) with a map join (aka broadcast or fragment replicate @@ -70,7 +76,18 @@ import org.apache.hadoop.util.ReflectionUtils; */ public class ConvertJoinMapJoin implements NodeProcessor { - static final private Log LOG = LogFactory.getLog(ConvertJoinMapJoin.class.getName()); + private static final Log LOG = LogFactory.getLog(ConvertJoinMapJoin.class.getName()); + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private static final Set<Class<? extends Operator<?>>> COSTLY_OPERATORS = + new ImmutableSet.Builder() + .add(CommonJoinOperator.class) + .add(GroupByOperator.class) + .add(LateralViewJoinOperator.class) + .add(PTFOperator.class) + .add(ReduceSinkOperator.class) + .add(UDTFOperator.class) + .build(); @Override /* @@ -538,16 +555,20 @@ public class ConvertJoinMapJoin implements NodeProcessor { HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); int bigTablePosition = -1; - + // number of costly ops (Join, GB, PTF/Windowing, TF) below the big input + int bigInputNumberCostlyOps = -1; + // stats of the big input Statistics bigInputStat = null; - long totalSize = 0; - int pos = 0; // bigTableFound means we've encountered a table that's bigger than the // max. This table is either the the big table or we cannot convert. - boolean bigTableFound = false; + boolean foundInputNotFittingInMemory = false; - for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) { + // total size of the inputs + long totalSize = 0; + + for (int pos = 0; pos < joinOp.getParentOperators().size(); pos++) { + Operator<? extends OperatorDesc> parentOp = joinOp.getParentOperators().get(pos); Statistics currInputStat = parentOp.getStatistics(); if (currInputStat == null) { @@ -556,15 +577,17 @@ public class ConvertJoinMapJoin implements NodeProcessor { } long inputSize = currInputStat.getDataSize(); + + boolean currentInputNotFittingInMemory = false; if ((bigInputStat == null) - || ((bigInputStat != null) && (inputSize > bigInputStat.getDataSize()))) { + || ((bigInputStat != null) && (inputSize > bigInputStat.getDataSize()))) { - if (bigTableFound) { + if (foundInputNotFittingInMemory) { // cannot convert to map join; we've already chosen a big table // on size and there's another one that's bigger. return -1; } - + if (inputSize/buckets > maxSize) { if (!bigTableCandidateSet.contains(pos)) { // can't use the current table as the big table, but it's too @@ -572,33 +595,46 @@ public class ConvertJoinMapJoin implements NodeProcessor { return -1; } - bigTableFound = true; + currentInputNotFittingInMemory = true; + foundInputNotFittingInMemory = true; } + } - if (bigInputStat != null) { - // we're replacing the current big table with a new one. Need - // to count the current one as a map table then. - totalSize += bigInputStat.getDataSize(); - } + int currentInputNumberCostlyOps = foundInputNotFittingInMemory ? + -1 : OperatorUtils.countOperatorsUpstream(parentOp, COSTLY_OPERATORS); + + // This input is the big table if it is contained in the big candidates set, and either: + // 1) we have not chosen a big table yet, or + // 2) it has been chosen as the big table above, or + // 3) the number of costly operators for this input is higher, or + // 4) the number of costly operators is equal, but the size is bigger, + boolean selectedBigTable = bigTableCandidateSet.contains(pos) && + (bigInputStat == null || currentInputNotFittingInMemory || + (!foundInputNotFittingInMemory && (currentInputNumberCostlyOps > bigInputNumberCostlyOps || + (currentInputNumberCostlyOps == bigInputNumberCostlyOps && inputSize > bigInputStat.getDataSize())))); + + if (bigInputStat != null && selectedBigTable) { + // We are replacing the current big table with a new one, thus + // we need to count the current one as a map table then. + totalSize += bigInputStat.getDataSize(); + } else if (!selectedBigTable) { + // This is not the first table and we are not using it as big table, + // in fact, we're adding this table as a map table + totalSize += inputSize; + } - if (totalSize/buckets > maxSize) { - // sum of small tables size in this join exceeds configured limit - // hence cannot convert. - return -1; - } + if (totalSize/buckets > maxSize) { + // sum of small tables size in this join exceeds configured limit + // hence cannot convert. + return -1; + } - if (bigTableCandidateSet.contains(pos)) { - bigTablePosition = pos; - bigInputStat = currInputStat; - } - } else { - totalSize += currInputStat.getDataSize(); - if (totalSize/buckets > maxSize) { - // cannot hold all map tables in memory. Cannot convert. - return -1; - } + if (selectedBigTable) { + bigTablePosition = pos; + bigInputNumberCostlyOps = currentInputNumberCostlyOps; + bigInputStat = currInputStat; } - pos++; + } return bigTablePosition; @@ -616,7 +652,6 @@ public class ConvertJoinMapJoin implements NodeProcessor { * * for tez. */ - public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, boolean removeReduceSink) throws SemanticException { // bail on mux operator because currently the mux operator masks the emit keys http://git-wip-us.apache.org/repos/asf/hive/blob/8e62edac/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index 4699e10..1f1bf3d 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -329,8 +329,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -362,31 +361,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Execution mode: llap Reducer 2 - Execution mode: llap + Execution mode: uber Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -398,12 +381,28 @@ STAGE PLANS: expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -556,8 +555,7 @@ STAGE PLANS: Tez Edges: Map 1 <- Map 3 (CUSTOM_EDGE) - Map 4 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -614,31 +612,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Execution mode: llap Reducer 2 - Execution mode: llap + Execution mode: uber Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -650,12 +632,28 @@ STAGE PLANS: expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 66 Data size: 700 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 66 Data size: 700 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -871,8 +869,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -904,31 +901,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Execution mode: llap Reducer 2 - Execution mode: llap + Execution mode: uber Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -936,12 +917,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -967,8 +964,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -994,31 +990,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Execution mode: llap Reducer 2 - Execution mode: llap + Execution mode: uber Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1026,12 +1006,28 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/8e62edac/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index 111aaaa..68d1253 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -527,8 +527,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -558,38 +557,38 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Execution mode: llap Reducer 2 - Execution mode: llap + Execution mode: uber Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -609,8 +608,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -640,43 +638,43 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col2 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Execution mode: llap Reducer 2 - Execution mode: llap + Execution mode: uber Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/8e62edac/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 3ebd690..4320f01 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -4262,7 +4262,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 -Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: -- parent is reduce tasks EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY @@ -4277,9 +4277,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4290,26 +4289,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - Map 3 + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -4330,7 +4314,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - Reducer 2 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + input vertices: + 0 Map 1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 Execution mode: uber Reduce Operator Tree: Group By Operator @@ -4345,19 +4357,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4365,7 +4364,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -4812,9 +4811,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Map 3 <- Map 2 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4851,13 +4849,27 @@ STAGE PLANS: 0 Map 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: '13' (type: string) - sort order: + - Map-reduce partition columns: '13' (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 '13' (type: string) + 1 '13' (type: string) + input vertices: + 1 Map 4 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: llap - Map 3 + Map 4 Map Operator Tree: TableScan alias: srcpart_hour @@ -4866,27 +4878,13 @@ STAGE PLANS: Filter Operator predicate: (hr = 13) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '13' (type: string) - 1 '13' (type: string) - input vertices: - 0 Map 2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: '13' (type: string) + sort order: + + Map-reduce partition columns: '13' (type: string) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - Reducer 4 + Reducer 3 Execution mode: uber Reduce Operator Tree: Group By Operator