Author: xuefu Date: Fri Nov 21 15:17:16 2014 New Revision: 1640934 URL: http://svn.apache.org/r1640934 Log: HIVE-8756: numRows and rawDataSize are not collected by the Spark stats [Spark Branch] (Na via Xuefu)
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1640934&r1=1640933&r2=1640934&view=diff ============================================================================== --- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original) +++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Fri Nov 21 15:17:16 2014 @@ -834,6 +834,7 @@ spark.query.files=add_part_multiple.q, \ stats_only_null.q, \ stats_partscan_1_23.q, \ stats0.q, \ + stats1.q, \ stats10.q, \ stats12.q, \ stats13.q, \ Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java?rev=1640934&r1=1640933&r2=1640934&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java Fri Nov 21 15:17:16 2014 @@ -130,6 +130,7 @@ public class GenSparkProcContext impleme public final Set<ReduceSinkOperator> clonedReduceSinks; public final Set<FileSinkOperator> fileSinkSet; + public final Map<FileSinkOperator, List<FileSinkOperator>> fileSinkMap; // remember which reducesinks we've already connected public final Set<ReduceSinkOperator> connectedReduceSinks; @@ -169,6 +170,7 @@ public class GenSparkProcContext impleme this.workWithUnionOperators = new LinkedHashSet<BaseWork>(); this.clonedReduceSinks = new LinkedHashSet<ReduceSinkOperator>(); this.fileSinkSet = new LinkedHashSet<FileSinkOperator>(); + this.fileSinkMap = new LinkedHashMap<FileSinkOperator, List<FileSinkOperator>>(); this.connectedReduceSinks = new LinkedHashSet<ReduceSinkOperator>(); } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java?rev=1640934&r1=1640933&r2=1640934&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java Fri Nov 21 15:17:16 2014 @@ -25,6 +25,7 @@ import org.apache.commons.logging.LogFac import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -48,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.Re import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty; import org.apache.hadoop.hive.ql.plan.SparkWork; import org.apache.hadoop.hive.ql.plan.UnionWork; +import org.apache.hadoop.hive.ql.stats.StatsFactory; import java.util.ArrayList; import java.util.Deque; @@ -182,6 +184,15 @@ public class GenSparkUtils { context.inputs, partitions, root, alias, context.conf, false); } + private void collectOperators (Operator<?> op, List<Operator<?>> opList) { + opList.add(op); + for (Object child : op.getChildOperators()) { + if (child != null) { + collectOperators((Operator<?>)child, opList); + } + } + } + // removes any union operator and clones the plan public void removeUnionOperators(Configuration conf, GenSparkProcContext context, BaseWork work) @@ -196,6 +207,29 @@ public class GenSparkUtils { // need to clone the plan. List<Operator<?>> newRoots = Utilities.cloneOperatorTree(conf, roots); + // Build a map to map the original FileSinkOperator and the cloned FileSinkOperators + // This map is used for set the stats flag for the cloned FileSinkOperators in later process + Iterator<Operator<?>> newRoots_it = newRoots.iterator(); + for (Operator<?> root : roots) { + Operator<?> newRoot = newRoots_it.next(); + List<Operator<?>> newOpQueue = new LinkedList<Operator<?>>(); + collectOperators (newRoot, newOpQueue); + List<Operator<?>> opQueue = new LinkedList<Operator<?>>(); + collectOperators (root, opQueue); + Iterator<Operator<?>> newOpQueue_it = newOpQueue.iterator(); + for (Operator<?> op : opQueue) { + Operator<?> newOp = newOpQueue_it.next(); + if (op instanceof FileSinkOperator) { + List<FileSinkOperator> fileSinkList = context.fileSinkMap.get((FileSinkOperator)op); + if (fileSinkList == null) { + fileSinkList = new LinkedList<FileSinkOperator>(); + } + fileSinkList.add((FileSinkOperator)newOp); + context.fileSinkMap.put((FileSinkOperator)op, fileSinkList); + } + } + } + // we're cloning the operator plan but we're retaining the original work. That means // that root operators have to be replaced with the cloned ops. The replacement map // tells you what that mapping is. @@ -272,8 +306,17 @@ public class GenSparkUtils { GenMapRedUtils.isInsertInto(parseContext, fileSink); HiveConf hconf = parseContext.getConf(); - boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, - hconf, fileSink, context.currentTask, isInsertTable); + boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, + hconf, fileSink, context.currentTask, isInsertTable); + // Set stats config for FileSinkOperators which are cloned from the fileSink + List<FileSinkOperator> fileSinkList = context.fileSinkMap.get(fileSink); + if (fileSinkList != null) { + for (FileSinkOperator fsOp : fileSinkList) { + fsOp.getConf().setGatherStats(fileSink.getConf().isGatherStats()); + fsOp.getConf().setStatsReliable(fileSink.getConf().isStatsReliable()); + fsOp.getConf().setMaxStatsKeyPrefixLength(fileSink.getConf().getMaxStatsKeyPrefixLength()); + } + } Path finalName = GenMapRedUtils.createMoveTask(context.currentTask, chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask); Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out?rev=1640934&r1=1640933&r2=1640934&view=diff ============================================================================== Files hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out (original) and hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out Fri Nov 21 15:17:16 2014 differ Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out?rev=1640934&r1=1640933&r2=1640934&view=diff ============================================================================== Files hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out (original) and hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out Fri Nov 21 15:17:16 2014 differ Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out?rev=1640934&r1=1640933&r2=1640934&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out (original) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out Fri Nov 21 15:17:16 2014 @@ -448,24 +448,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 15) (type: boolean) - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int), key (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -489,24 +489,24 @@ STAGE PLANS: 0 {VALUE._col0} 1 outputColumnNames: _col1 - Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -676,24 +676,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t3 - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 5) (type: boolean) - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -703,24 +703,24 @@ STAGE PLANS: 0 {VALUE._col0} 1 outputColumnNames: _col1 - Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -992,15 +992,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1010,24 +1010,24 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1206,24 +1206,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -1249,25 +1249,25 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} {VALUE._col0} 2 outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1347,15 +1347,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int), value (type: string) sort order: ++ Map-reduce partition columns: key (type: int), value (type: string) - Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1365,24 +1365,24 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1482,15 +1482,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1502,24 +1502,24 @@ STAGE PLANS: 1 2 outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1606,12 +1606,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1623,24 +1623,24 @@ STAGE PLANS: 1 2 outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1710,12 +1710,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -1756,24 +1756,24 @@ STAGE PLANS: 1 2 outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1846,12 +1846,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -1892,24 +1892,24 @@ STAGE PLANS: 1 2 outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2011,12 +2011,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2028,24 +2028,24 @@ STAGE PLANS: 1 2 outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2147,12 +2147,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2164,24 +2164,24 @@ STAGE PLANS: 1 2 outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2285,12 +2285,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2302,24 +2302,24 @@ STAGE PLANS: 1 2 outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2438,15 +2438,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -2457,12 +2457,12 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reducer 3 Reduce Operator Tree: @@ -2473,24 +2473,24 @@ STAGE PLANS: 0 {VALUE._col0} 1 outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2590,15 +2590,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (value is not null and (key > 100)) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Reducer 2 Reduce Operator Tree: Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out?rev=1640934&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out Fri Nov 21 15:17:16 2014 @@ -0,0 +1,247 @@ +PREHOOK: query: create table tmptable(key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmptable +POSTHOOK: query: create table tmptable(key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmptable +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE tmptable +SELECT unionsrc.key, unionsrc.value +FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 + UNION ALL + SELECT s2.key AS key, s2.value AS value FROM src1 s2) unionsrc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE tmptable +SELECT unionsrc.key, unionsrc.value +FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 + UNION ALL + SELECT s2.key AS key, s2.value AS value FROM src1 s2) unionsrc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Union 3 <- Map 4 (NONE, 0), Reducer 2 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: s2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + Union 3 + Vertex: Union 3 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE tmptable +SELECT unionsrc.key, unionsrc.value +FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 + UNION ALL + SELECT s2.key AS key, s2.value AS value FROM src1 s2) unionsrc +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@tmptable +POSTHOOK: query: INSERT OVERWRITE TABLE tmptable +SELECT unionsrc.key, unionsrc.value +FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 + UNION ALL + SELECT s2.key AS key, s2.value AS value FROM src1 s2) unionsrc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@tmptable +POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM tmptable x SORT BY x.key, x.value +PREHOOK: type: QUERY +PREHOOK: Input: default@tmptable +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM tmptable x SORT BY x.key, x.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmptable +#### A masked pattern was here #### + + + + + val_165 + val_193 + val_265 + val_27 + val_409 + val_484 +128 +146 val_146 +150 val_150 +213 val_213 +224 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +369 +401 val_401 +406 val_406 +66 val_66 +98 val_98 +tst1 500 +PREHOOK: query: DESCRIBE FORMATTED tmptable +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tmptable +POSTHOOK: query: DESCRIBE FORMATTED tmptable +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tmptable +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 2 + numRows 26 + rawDataSize 199 + totalSize 225 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Load a file into a existing table +-- Some stats (numFiles, totalSize) should be updated correctly +-- Some other stats (numRows, rawDataSize) should be cleared +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE tmptable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tmptable +POSTHOOK: query: -- Load a file into a existing table +-- Some stats (numFiles, totalSize) should be updated correctly +-- Some other stats (numRows, rawDataSize) should be cleared +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE tmptable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tmptable +PREHOOK: query: DESCRIBE FORMATTED tmptable +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tmptable +POSTHOOK: query: DESCRIBE FORMATTED tmptable +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tmptable +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 0 + rawDataSize 0 + totalSize 1583 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1