Repository: hive Updated Branches: refs/heads/master 26d6de7e2 -> b6c15bc72
HIVE-17073: Incorrect result with vectorization and SharedWorkOptimizer (Jesus Camacho Rodriguez, reviewed by Matt McCline) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b6c15bc7 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b6c15bc7 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b6c15bc7 Branch: refs/heads/master Commit: b6c15bc72206b4c79c93c9c620e28561ca4b9e92 Parents: 26d6de7 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Tue Jul 11 18:23:31 2017 +0100 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Wed Jul 12 17:36:31 2017 +0200 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 3 +- .../apache/hadoop/hive/ql/exec/Operator.java | 58 ++++++ .../hadoop/hive/ql/exec/TableScanOperator.java | 11 +- .../vector/VectorAppMasterEventOperator.java | 3 +- .../ql/exec/vector/VectorFilterOperator.java | 2 +- .../ql/exec/vector/VectorGroupByOperator.java | 4 +- .../ql/exec/vector/VectorLimitOperator.java | 2 +- .../exec/vector/VectorMapJoinBaseOperator.java | 3 +- .../exec/vector/VectorSMBMapJoinOperator.java | 2 +- .../ql/exec/vector/VectorSelectOperator.java | 4 +- .../VectorMapJoinGenerateResultOperator.java | 6 +- .../hive/ql/optimizer/physical/Vectorizer.java | 6 + .../hadoop/hive/ql/plan/TableScanDesc.java | 10 + .../exec/vector/TestVectorSelectOperator.java | 3 +- .../vectorized_multi_output_select.q | 28 +++ .../llap/vectorized_multi_output_select.q.out | 201 +++++++++++++++++++ 16 files changed, 331 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 409fc90..1cc0104 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -741,7 +741,8 @@ minillaplocal.query.files=acid_globallimit.q,\ smb_mapjoin_17.q,\ groupby_resolution.q,\ windowing_windowspec2.q,\ - vectorized_join46.q + vectorized_join46.q,\ + vectorized_multi_output_select.q encrypted.query.files=encryption_join_unencrypted_tbl.q,\ encryption_insert_partition_static.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 3656842..7f646c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -110,6 +111,11 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C private boolean useBucketizedHiveInputFormat; + // Data structures specific for vectorized operators. + private int size; + private boolean selectedInUse; + private int[] selected; + // dummy operator (for not increasing seqId) protected Operator(String name, CompilationOpContext cContext) { this(); @@ -122,6 +128,8 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C childOperators = new ArrayList<Operator<? extends OperatorDesc>>(); parentOperators = new ArrayList<Operator<? extends OperatorDesc>>(); abortOp = new AtomicBoolean(false); + // Initializing data structures for vectorization + selected = new int[VectorizedRowBatch.DEFAULT_SIZE]; } public Operator(CompilationOpContext cContext) { @@ -880,6 +888,56 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C protected void forward(Object row, ObjectInspector rowInspector) throws HiveException { + forward(row, rowInspector, false); + } + + protected void forward(Object row, ObjectInspector rowInspector, boolean isVectorized) + throws HiveException { + if (isVectorized && getNumChild() > 1) { + vectorForward((VectorizedRowBatch) row, rowInspector); + return; + } + baseForward(row, rowInspector); + } + + private void vectorForward(VectorizedRowBatch vrg, ObjectInspector rowInspector) + throws HiveException { + runTimeNumRows++; + if (getDone()) { + return; + } + + // Data structures to store original values + size = vrg.size; + selectedInUse = vrg.selectedInUse; + if (vrg.selectedInUse) { + System.arraycopy(vrg.selected, 0, selected, 0, size); + } + + int childrenDone = 0; + for (int i = 0; i < childOperatorsArray.length; i++) { + Operator<? extends OperatorDesc> o = childOperatorsArray[i]; + if (o.getDone()) { + childrenDone++; + } else { + o.process(vrg, childOperatorsTag[i]); + // Restore original values + vrg.size = size; + vrg.selectedInUse = selectedInUse; + if (vrg.selectedInUse) { + System.arraycopy(selected, 0, vrg.selected, 0, size); + } + } + } + + // if all children are done, this operator is also done + if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { + setDone(true); + } + } + + private void baseForward(Object row, ObjectInspector rowInspector) + throws HiveException { runTimeNumRows++; if (getDone()) { return; http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index 17f2efb..ffedefe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; import org.apache.hadoop.hive.ql.stats.StatsPublisher; @@ -69,6 +70,8 @@ public class TableScanOperator extends Operator<TableScanDesc> implements // insiderView will tell this TableScan is inside a view or not. private transient boolean insideView; + private transient boolean vectorized; + private String defaultPartitionName; /** @@ -110,6 +113,10 @@ public class TableScanOperator extends Operator<TableScanDesc> implements public void process(Object row, int tag) throws HiveException { if (rowLimit >= 0) { if (row instanceof VectorizedRowBatch) { + // We need to check with 'instanceof' instead of just checking + // vectorized because the row can be a VectorizedRowBatch when + // FetchOptimizer kicks in even if the operator pipeline is not + // vectorized VectorizedRowBatch batch = (VectorizedRowBatch) row; if (currCount >= rowLimit) { setDone(true); @@ -127,7 +134,7 @@ public class TableScanOperator extends Operator<TableScanDesc> implements if (conf != null && conf.isGatherStats()) { gatherStats(row); } - forward(row, inputObjInspectors[tag]); + forward(row, inputObjInspectors[tag], vectorized); } // Change the table partition for collecting stats @@ -258,6 +265,8 @@ public class TableScanOperator extends Operator<TableScanDesc> implements defaultPartitionName = HiveConf.getVar(hconf, HiveConf.ConfVars.DEFAULTPARTITIONNAME); currentStat = null; stats = new HashMap<String, Stat>(); + + vectorized = conf.isVectorized(); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java index 2bf6ac5..2c433f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java @@ -130,6 +130,7 @@ public class VectorAppMasterEventOperator extends AppMasterEventOperator { throw new HiveException(e); } - forward(data, rowInspector); + forward(data, rowInspector, true); } + } http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index fd885a9..fdd5aab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -120,7 +120,7 @@ public class VectorFilterOperator extends FilterOperator { // All are selected, do nothing } if (vrg.size > 0) { - forward(vrg, null); + forward(vrg, null, true); } // Restore the original selected vector http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 642dd46..613a31a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -1057,7 +1057,7 @@ public class VectorGroupByOperator extends Operator<GroupByDesc> implements for (int i = 0; i < aggregators.length; ++i) { forwardCache[fi++] = aggregators[i].evaluateOutput(agg.getAggregationBuffer(i)); } - forward(forwardCache, outputObjInspector); + forward(forwardCache, outputObjInspector, false); } else { // Output keys and aggregates into the output batch. for (int i = 0; i < outputKeyLength; ++i) { @@ -1097,7 +1097,7 @@ public class VectorGroupByOperator extends Operator<GroupByDesc> implements } private void flushOutput() throws HiveException { - forward(outputBatch, null); + forward(outputBatch, null, true); outputBatch.reset(); } http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index ea00af3..b37dd05 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -75,7 +75,7 @@ public class VectorLimitOperator extends LimitOperator { batch.selected[i] = batch.selected[skipSize + i]; } } - forward(row, inputObjInspectors[tag]); + forward(row, inputObjInspectors[tag], true); currCount += batch.size; } } http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java index bcde25f..b2c8684 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java @@ -126,7 +126,7 @@ public class VectorMapJoinBaseOperator extends MapJoinOperator implements Vector } private void flushOutput() throws HiveException { - forward(outputBatch, null); + forward(outputBatch, null, true); outputBatch.reset(); } @@ -185,4 +185,5 @@ public class VectorMapJoinBaseOperator extends MapJoinOperator implements Vector public VectorizationContext getOuputVectorizationContext() { return vOutContext; } + } http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index f8c4223..0473f14 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -307,7 +307,7 @@ public class VectorSMBMapJoinOperator extends SMBMapJoinOperator implements Vect } private void flushOutput() throws HiveException { - forward(outputBatch, null); + forward(outputBatch, null, true); outputBatch.reset(); } http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index 5c490ef..17ccf21 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -115,7 +115,7 @@ public class VectorSelectOperator extends Operator<SelectDesc> implements // Just forward the row as is if (conf.isSelStarNoCompute()) { - forward(row, inputObjInspectors[tag]); + forward(row, inputObjInspectors[tag], true); return; } @@ -134,7 +134,7 @@ public class VectorSelectOperator extends Operator<SelectDesc> implements int originalProjectionSize = vrg.projectionSize; vrg.projectionSize = projectedOutputColumns.length; vrg.projectedColumns = this.projectedOutputColumns; - forward(vrg, outputObjInspector); + forward(vrg, outputObjInspector, true); // Revert the projected columns back, because vrg will be re-used. vrg.projectionSize = originalProjectionSize; http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 1c20d93..bab5ee4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -635,7 +635,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC batch.projectionSize = outputProjection.length; batch.projectedColumns = outputProjection; - forward(batch, null); + forward(batch, null, true); // Revert the projected columns back, because batch can be re-used by our parent operators. batch.projectionSize = originalProjectionSize; @@ -647,7 +647,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC * Forward the overflow batch and reset the batch. */ protected void forwardOverflow() throws HiveException { - forward(overflowBatch, null); + forward(overflowBatch, null, true); overflowBatch.reset(); maybeCheckInterrupt(); } @@ -664,7 +664,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC * Forward the overflow batch, but do not reset the batch. */ private void forwardOverflowNoReset() throws HiveException { - forward(overflowBatch, null); + forward(overflowBatch, null, true); } /* http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 933e47d..5f442a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1107,6 +1107,12 @@ public class Vectorizer implements PhysicalPlanResolver { HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>(); ogw.startWalking(topNodes, nodeOutput); + for (Node topNode : topNodes) { + if (topNode instanceof TableScanOperator) { + ((TableScanOperator) topNode).getConf().setVectorized(true); + } + } + vectorTaskColumnInfo.setScratchTypeNameArray(vnp.getVectorScratchColumnTypeNames()); vectorTaskColumnInfo.transferToBaseWork(mapWork); http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 570bd6b..d1c8690 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -102,6 +102,8 @@ public class TableScanDesc extends AbstractOperatorDesc { private boolean isAcidTable; + private boolean vectorized; + private AcidUtils.AcidOperationalProperties acidOperationalProperties = null; private transient TableSample tableSample; @@ -444,4 +446,12 @@ public class TableScanDesc extends AbstractOperatorDesc { } return new TableScanOperatorExplainVectorization(this, vectorDesc); } + + public void setVectorized(boolean vectorized) { + this.vectorized = vectorized; + } + + public boolean isVectorized() { + return vectorized; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index 614b1d1..71da542 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -61,7 +61,8 @@ public class TestVectorSelectOperator { * Override forward to do validation */ @Override - public void forward(Object row, ObjectInspector rowInspector) throws HiveException { + public void forward(Object row, ObjectInspector rowInspector, boolean isVectorized) + throws HiveException { VectorizedRowBatch vrg = (VectorizedRowBatch) row; int[] projections = vrg.projectedColumns; http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/test/queries/clientpositive/vectorized_multi_output_select.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_multi_output_select.q b/ql/src/test/queries/clientpositive/vectorized_multi_output_select.q new file mode 100644 index 0000000..e768a5d --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorized_multi_output_select.q @@ -0,0 +1,28 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=3000; +set hive.strict.checks.cartesian.product=false; +set hive.merge.nway.joins=false; +set hive.vectorized.execution.enabled=true; + +explain +select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2; + +select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2; http://git-wip-us.apache.org/repos/asf/hive/blob/b6c15bc7/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out new file mode 100644 index 0000000..f744eb6 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out @@ -0,0 +1,201 @@ +Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Reducer 2' is a cross product +PREHOOK: query: explain +select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 5 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value = 'val_278') and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value = 'val_255') and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Reducer 2' is a cross product +PREHOOK: query: select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +2 2