Author: gunther Date: Fri Feb 13 08:15:57 2015 New Revision: 1659464 URL: http://svn.apache.org/r1659464 Log: HIVE-9635: LLAP: I'm the decider (Gunther Hagleitner)
Added: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java hive/branches/llap/ql/src/test/queries/clientpositive/llapdecider.q hive/branches/llap/ql/src/test/results/clientpositive/tez/llapdecider.q.out Modified: hive/branches/llap/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java hive/branches/llap/data/conf/tez/hive-site.xml hive/branches/llap/itests/src/test/resources/testconfiguration.properties hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java Modified: hive/branches/llap/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java URL: http://svn.apache.org/viewvc/hive/branches/llap/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1659464&r1=1659463&r2=1659464&view=diff ============================================================================== --- hive/branches/llap/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original) +++ hive/branches/llap/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Fri Feb 13 08:15:57 2015 @@ -2011,6 +2011,20 @@ public class HiveConf extends Configurat LLAP_ORC_CACHE_MAX_SIZE("hive.llap.io.cache.orc.size", 1024L * 1024 * 1024, ""), LLAP_USE_LRFU("hive.llap.io.use.lrfu", false, ""), LLAP_LRFU_LAMBDA("hive.llap.io.lrfu.lambda", 0.01f, ""), + LLAP_AUTO_ENFORCE_TREE("hive.llap.auto.enforce.tree", true, + "Enforce that all parents are in llap, before considering vertex"), + LLAP_AUTO_ENFORCE_VECTORIZED("hive.llap.auto.enforce.vectorized", true, + "Enforce that inputs are vectorized, before considering vertex"), + LLAP_AUTO_ENFORCE_STATS("hive.llap.auto.enforce.stats", true, + "Enforce that col stats are available, before considering vertex"), + LLAP_AUTO_MAX_INPUT("hive.llap.auto.max.input.size", 10*1024*1024*1024L, + "Check input size, before considering vertex (-1 disables check)"), + LLAP_AUTO_MAX_OUTPUT("hive.llap.auto.max.output.size", 1*1024*1024*1024L, + "Check output size, before considering vertex (-1 disables check)"), + LLAP_EXECUTION_MODE("hive.llap.execution.mode", "auto", + new StringSet("auto", "none", "all", "map"), + "Chooses whether query fragments will run in container or in llap"), + SPARK_CLIENT_FUTURE_TIMEOUT("hive.spark.client.future.timeout", "60s", new TimeValidator(TimeUnit.SECONDS), @@ -2034,7 +2048,8 @@ public class HiveConf extends Configurat SPARK_RPC_CHANNEL_LOG_LEVEL("hive.spark.client.channel.log.level", null, "Channel logging level for remote Spark driver. One of {DEBUG, ERROR, INFO, TRACE, WARN}."), SPARK_RPC_SASL_MECHANISM("hive.spark.client.rpc.sasl.mechanisms", "DIGEST-MD5", - "Name of the SASL mechanism to use for authentication."); ; + "Name of the SASL mechanism to use for authentication.") + ; public final String varname; private final String defaultExpr; Modified: hive/branches/llap/data/conf/tez/hive-site.xml URL: http://svn.apache.org/viewvc/hive/branches/llap/data/conf/tez/hive-site.xml?rev=1659464&r1=1659463&r2=1659464&view=diff ============================================================================== Binary files - no diff available. Modified: hive/branches/llap/itests/src/test/resources/testconfiguration.properties URL: http://svn.apache.org/viewvc/hive/branches/llap/itests/src/test/resources/testconfiguration.properties?rev=1659464&r1=1659463&r2=1659464&view=diff ============================================================================== --- hive/branches/llap/itests/src/test/resources/testconfiguration.properties (original) +++ hive/branches/llap/itests/src/test/resources/testconfiguration.properties Fri Feb 13 08:15:57 2015 @@ -285,7 +285,8 @@ minitez.query.files=bucket_map_join_tez1 dynamic_partition_pruning.q,\ dynamic_partition_pruning_2.q,\ mapjoin_decimal.q,\ - lvj_mapjoin.q, \ + lvj_mapjoin.q,\ + llapdecider.q,\ mrr.q,\ tez_bmj_schema_evolution.q,\ tez_dml.q,\ Added: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java?rev=1659464&view=auto ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java (added) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java Fri Feb 13 08:15:57 2015 @@ -0,0 +1,318 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ScriptOperator; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.tez.TezTask; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.TezWork; + +import static org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider.LlapMode.*; + +/** + * LlapDecider takes care of tagging certain vertices in the execution + * graph as "llap", which in turn causes them to be submitted to an + * llap daemon instead of a regular yarn container. + * + * The actual algoritm used is driven by LLAP_EXECUTION_MODE. "all", + * "none" and "map" mechanically tag those elements. "auto" tries to + * be smarter by looking for suitable vertices. + * + * Regardless of the algorithm used, it's always ensured that there's + * not user code that will be sent to the daemon (ie.: script + * operators, temporary functions, etc) + */ +public class LlapDecider implements PhysicalPlanResolver { + + protected static transient final Log LOG + = LogFactory.getLog(LlapDecider.class); + + private PhysicalContext physicalContext; + + private HiveConf conf; + + public enum LlapMode { + map, // map operators only + all, // all operators + none, // no operators + auto // please hive, choose for me + } + + private LlapMode mode; + + class LlapDecisionDispatcher implements Dispatcher { + + private PhysicalContext pctx; + private HiveConf conf; + + public LlapDecisionDispatcher(PhysicalContext pctx) { + this.pctx = pctx; + this.conf = pctx.getConf(); + } + + @Override + public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) + throws SemanticException { + Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; + if (currTask instanceof TezTask) { + TezWork work = ((TezTask) currTask).getWork(); + for (BaseWork w: work.getAllWork()) { + handleWork(work, w); + } + } + return null; + } + + private void handleWork(TezWork tezWork, BaseWork work) + throws SemanticException { + if (evaluateWork(tezWork, work)) { + convertWork(tezWork, work); + } + } + + private void convertWork(TezWork tezWork, BaseWork work) + throws SemanticException { + work.setLlapMode(true); + } + + private boolean evaluateWork(TezWork tezWork, BaseWork work) + throws SemanticException { + + LOG.info("Evaluating work item: " + work.getName()); + + // no means no + if (mode == none) { + return false; + } + + // first we check if we *can* run in llap. If we need to use + // user code to do so (script/udf) we don't. + if (!evaluateOperators(work)) { + LOG.info("some operators cannot be run in llap"); + return false; + } + + // --- From here on out we choose whether we *want* to run in llap + + // if mode is all just run it + if (mode == all) { + return true; + } + + // if map mode run iff work is map work + if (mode == map) { + return work instanceof MapWork; + } + + // --- From here we evaluate the auto mode + assert mode == auto; + + // if parents aren't in llap neither should the child + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_AUTO_ENFORCE_TREE) + && !checkParentsInLlap(tezWork, work)) { + LOG.info("Parent not in llap."); + return false; + } + + // only vectorized orc input is cached. so there's a reason to + // limit to that for now. + if (work instanceof MapWork + && HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_AUTO_ENFORCE_VECTORIZED) + && !checkInputsVectorized((MapWork) work)) { + LOG.info("Inputs not vectorized."); + return false; + } + + // check if there's at least some degree of stats available + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_AUTO_ENFORCE_STATS) + && !checkPartialStatsAvailable(work)) { + LOG.info("No column stats available."); + return false; + } + + // now let's take a look at input sizes + long maxInput = HiveConf.getLongVar(conf, HiveConf.ConfVars.LLAP_AUTO_MAX_INPUT); + long expectedInput = computeInputSize(work); + if (maxInput >= 0 && (expectedInput > maxInput)) { + LOG.info(String.format("Inputs too big (%d > %d)", expectedInput, maxInput)); + return false; + } + + // and finally let's check output sizes + long maxOutput = HiveConf.getLongVar(conf, HiveConf.ConfVars.LLAP_AUTO_MAX_OUTPUT); + long expectedOutput = computeOutputSize(work); + if (maxOutput >= 0 && (expectedOutput > maxOutput)) { + LOG.info(String.format("Outputs too big (%d > %d)", expectedOutput, maxOutput)); + return false; + } + + // couldn't convince you otherwise? well then let's llap. + return true; + } + + private Map<Rule, NodeProcessor> getRules() { + Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); + opRules.put(new RuleRegExp("No scripts", ScriptOperator.getOperatorName() + ".*"), + new NodeProcessor() { + public Object process(Node n, Stack<Node> s, NodeProcessorCtx c, + Object... os) { + return new Boolean(false); + } + }); + return opRules; + } + + private boolean evaluateOperators(BaseWork work) throws SemanticException { + // lets take a look at the operators. we're checking for user + // code in those. we will not run that in llap. + Dispatcher disp = new DefaultRuleDispatcher(null, getRules(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + + ArrayList<Node> topNodes = new ArrayList<Node>(); + topNodes.addAll(work.getAllRootOperators()); + + HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>(); + ogw.startWalking(topNodes, nodeOutput); + + for (Node n : nodeOutput.keySet()) { + if (nodeOutput.get(n) != null) { + if (!((Boolean)nodeOutput.get(n))) { + return false; + } + } + } + return true; + } + + private boolean checkParentsInLlap(TezWork tezWork, BaseWork base) { + for (BaseWork w: tezWork.getParents(base)) { + if (!w.getLlapMode()) { + LOG.info("Not all parents are run in llap"); + return false; + } + } + return true; + } + + private boolean checkInputsVectorized(MapWork mapWork) { + for (String path : mapWork.getPathToPartitionInfo().keySet()) { + PartitionDesc pd = mapWork.getPathToPartitionInfo().get(path); + List<Class<?>> interfaceList = + Arrays.asList(pd.getInputFileFormatClass().getInterfaces()); + if (!interfaceList.contains(VectorizedInputFormatInterface.class)) { + LOG.info("Input format: " + pd.getInputFileFormatClassName() + + ", doesn't provide vectorized input"); + return false; + } + } + return true; + } + + private boolean checkPartialStatsAvailable(BaseWork base) { + for (Operator<?> o: base.getAllRootOperators()) { + if (o.getStatistics().getColumnStatsState() == Statistics.State.NONE) { + return false; + } + } + return true; + } + + private long computeEdgeSize(BaseWork base, boolean input) { + long size = 0; + for (Operator<?> o: (input ? base.getAllRootOperators() : base.getAllLeafOperators())) { + if (o.getStatistics() == null) { + // return worst case if unknown + return Long.MAX_VALUE; + } + + long currSize = o.getStatistics().getDataSize(); + if ((currSize < 0) || ((Long.MAX_VALUE - size) < currSize)) { + // overflow + return Long.MAX_VALUE; + } + size += currSize; + } + return size; + } + + private long computeInputSize(BaseWork base) { + return computeEdgeSize(base, true); + } + + private long computeOutputSize(BaseWork base) { + return computeEdgeSize(base, false); + } + } + + @Override + public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { + + this.physicalContext = pctx; + this.conf = pctx.getConf(); + + this.mode = LlapMode.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_EXECUTION_MODE)); + LOG.info("llap mode: "+this.mode); + + if (mode == none) { + LOG.info("LLAP disabled."); + return pctx; + } + + // create dispatcher and graph walker + Dispatcher disp = new LlapDecisionDispatcher(pctx); + TaskGraphWalker ogw = new TaskGraphWalker(disp); + + // get all the tasks nodes from root task + ArrayList<Node> topNodes = new ArrayList<Node>(); + topNodes.addAll(pctx.getRootTasks()); + + // begin to walk through the task tree. + ogw.startWalking(topNodes, null); + return pctx; + } +} Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java?rev=1659464&r1=1659463&r2=1659464&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java Fri Feb 13 08:15:57 2015 @@ -70,6 +70,7 @@ import org.apache.hadoop.hive.ql.optimiz import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck; +import org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider; import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; @@ -475,6 +476,12 @@ public class TezCompiler extends TaskCom } else { LOG.debug("Skipping stage id rearranger"); } + + if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) { + physicalCtx = new LlapDecider().resolve(physicalCtx); + } else { + LOG.debug("Skipping llap decider"); + } return; } } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java?rev=1659464&r1=1659463&r2=1659464&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java Fri Feb 13 08:15:57 2015 @@ -60,6 +60,8 @@ public abstract class BaseWork extends A // Vectorization. protected Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps = null; protected Map<String, Map<String, Integer>> allColumnVectorMaps = null; + + protected boolean llapMode = false; protected boolean vectorMode = false; public void setGatheringStats(boolean gatherStats) { @@ -184,6 +186,14 @@ public abstract class BaseWork extends A return vectorMode; } + public void setLlapMode(boolean llapMode) { + this.llapMode = llapMode; + } + + public boolean getLlapMode() { + return llapMode; + } + public abstract void configureJobConf(JobConf job); public void setTag(int tag) { Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1659464&r1=1659463&r2=1659464&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Fri Feb 13 08:15:57 2015 @@ -313,8 +313,17 @@ public class MapWork extends BaseWork { } @Explain(displayName = "Execution mode") - public String getVectorModeOn() { - return vectorMode ? "vectorized" : null; + public String getExecutionMode() { + if (vectorMode) { + if (llapMode) { + return "vectorized, llap"; + } else { + return "vectorized"; + } + } else if (llapMode) { + return "llap"; + } + return null; } @Override Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java?rev=1659464&r1=1659463&r2=1659464&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java Fri Feb 13 08:15:57 2015 @@ -149,8 +149,17 @@ public class ReduceWork extends BaseWork } @Explain(displayName = "Execution mode") - public String getVectorModeOn() { - return vectorMode ? "vectorized" : null; + public String getExecutionMode() { + if (vectorMode) { + if (llapMode) { + return "vectorized, llap"; + } else { + return "vectorized"; + } + } else if (llapMode) { + return "llap"; + } + return null; } @Explain(displayName = "Reduce Operator Tree") Added: hive/branches/llap/ql/src/test/queries/clientpositive/llapdecider.q URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/test/queries/clientpositive/llapdecider.q?rev=1659464&view=auto ============================================================================== --- hive/branches/llap/ql/src/test/queries/clientpositive/llapdecider.q (added) +++ hive/branches/llap/ql/src/test/queries/clientpositive/llapdecider.q Fri Feb 13 08:15:57 2015 @@ -0,0 +1,51 @@ +set hive.stats.fetch.column.stats=true; + +-- simple query with multiple reduce stages +EXPLAIN SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt; + +create table src_orc stored as orc as select * from src; + +EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt; + +set hive.llap.auto.enforce.stats=false; + +EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt; + +set hive.llap.auto.enforce.stats=true; + +analyze table src_orc compute statistics for columns; + +EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt; + +EXPLAIN SELECT * from src_orc join src on (src_orc.key = src.key) order by src.value; + +EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value; + +set hive.llap.auto.enforce.tree=false; + +EXPLAIN SELECT * from src_orc join src on (src_orc.key = src.key) order by src.value; + +set hive.llap.auto.enforce.tree=true; + +set hive.llap.auto.max.input.size=10; + +EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value; + +set hive.llap.auto.max.input.size=1000000000; +set hive.llap.auto.max.output.size=10; + +EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value; + +set hive.llap.auto.max.output.size=1000000000; + +set hive.llap.execution.mode=map; + +EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value; + +set hive.llap.execution.mode=none; + +EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value; + +set hive.llap.execution.mode=all; + +EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value; Added: hive/branches/llap/ql/src/test/results/clientpositive/tez/llapdecider.q.out URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/test/results/clientpositive/tez/llapdecider.q.out?rev=1659464&view=auto ============================================================================== --- hive/branches/llap/ql/src/test/results/clientpositive/tez/llapdecider.q.out (added) +++ hive/branches/llap/ql/src/test/results/clientpositive/tez/llapdecider.q.out Fri Feb 13 08:15:57 2015 @@ -0,0 +1,1013 @@ +PREHOOK: query: -- simple query with multiple reduce stages +EXPLAIN SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt +PREHOOK: type: QUERY +POSTHOOK: query: -- simple query with multiple reduce stages +EXPLAIN SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table src_orc stored as orc as select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc +POSTHOOK: query: create table src_orc stored as orc as select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc +PREHOOK: query: EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table src_orc compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table src_orc compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT * from src_orc join src on (src_orc.key = src.key) order by src.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * from src_orc join src on (src_orc.key = src.key) order by src.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + Map 4 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT * from src_orc join src on (src_orc.key = src.key) order by src.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * from src_orc join src on (src_orc.key = src.key) order by src.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + Map 4 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * from src_orc s1 join src_orc s2 on (s1.key = s2.key) order by s2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + Map 4 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Statistics: Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1219 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +