[03/50] [abbrv] hive git commit: HIVE-17538 : Enhance estimation of stats to estimate even if only one column is missing stats (Vineet Garg, reviewed by Ashutosh Chauhan)

sershe Wed, 27 Sep 2017 13:23:04 -0700

HIVE-17538 : Enhance estimation of stats to estimate even if only one column is 
missing stats (Vineet Garg, reviewed by Ashutosh Chauhan)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5d5b63dc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5d5b63dc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5d5b63dc

Branch: refs/heads/hive-14535
Commit: 5d5b63dc458f60d4afe80a9596946bc19f8a048f
Parents: bb26d03
Author: Vineet Garg <[email protected]>
Authored: Mon Sep 25 13:57:55 2017 -0700
Committer: Vineet Garg <[email protected]>
Committed: Mon Sep 25 13:57:55 2017 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/stats/StatsUtils.java | 38 +++++++---
 .../clientpositive/annotate_stats_groupby.q.out | 22 +++---
 .../annotate_stats_groupby2.q.out               | 12 ++--
 .../annotate_stats_join_pkfk.q.out              | 12 ++--
 .../clientpositive/annotate_stats_select.q.out  |  8 +--
 .../clientpositive/annotate_stats_table.q.out   |  4 +-
 .../cbo_rp_annotate_stats_groupby.q.out         | 22 +++---
 .../clientpositive/filter_join_breaktask.q.out  | 60 ++++++++--------
 .../clientpositive/llap/explainuser_2.q.out     | 58 +++++++--------
 .../llap/filter_join_breaktask.q.out            | 76 ++++++++++----------
 .../clientpositive/ppd_repeated_alias.q.out     | 52 +++++++-------
 .../spark/filter_join_breaktask.q.out           | 48 ++++++-------
 .../clientpositive/tez/explainanalyze_2.q.out   | 58 +++++++--------
 13 files changed, 250 insertions(+), 220 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 17d9f2d..778c918 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -35,6 +35,7 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import com.google.common.collect.Sets;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -277,7 +278,28 @@ public class StatsUtils {
     }
   }
 
-  private static long getNumRows(HiveConf conf, List<ColumnInfo> schema, 
List<String> neededColumns, Table table, long ds) {
+  private static void estimateStatsForMissingCols(List<String> neededColumns, 
List<ColStatistics> columnStats,
+                                           Table table, HiveConf conf, long 
nr, List<ColumnInfo> schema) {
+
+    Set<String> neededCols = new HashSet<>(neededColumns);
+    Set<String> colsWithStats = new HashSet<>();
+
+    for (ColStatistics cstats : columnStats) {
+      colsWithStats.add(cstats.getColumnName());
+    }
+
+    List<String> missingColStats = new 
ArrayList<String>(Sets.difference(neededCols, colsWithStats));
+
+    if(missingColStats.size() > 0) {
+      List<ColStatistics> estimatedColStats = estimateStats(table, schema, 
missingColStats, conf, nr);
+      for (ColStatistics estColStats : estimatedColStats) {
+        columnStats.add(estColStats);
+      }
+    }
+  }
+
+  private static long getNumRows(HiveConf conf, List<ColumnInfo> schema, 
List<String> neededColumns,
+                                 Table table, long ds) {
     long nr = getNumRows(table);
     // number of rows -1 means that statistics from metastore is not reliable
     // and 0 means statistics gathering is disabled
@@ -322,9 +344,13 @@ public class StatsUtils {
       List<ColStatistics> colStats = Lists.newArrayList();
       if (fetchColStats) {
         colStats = getTableColumnStats(table, schema, neededColumns, 
colStatsCache);
-        if(colStats == null || colStats.size() < 1) {
-          colStats = estimateStats(table,schema,neededColumns, conf, nr);
+        if(colStats == null) {
+          colStats = Lists.newArrayList();
         }
+        estimateStatsForMissingCols(neededColumns, colStats, table, conf, nr, 
schema);
+
+        // we should have stats for all columns (estimated or actual)
+        assert(neededColumns.size() == colStats.size());
         long betterDS = getDataSizeFromColumnStats(nr, colStats);
         ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS;
       }
@@ -457,15 +483,11 @@ public class StatsUtils {
             aggrStats.getColStats() != null && aggrStats.getColStatsSize() != 
0;
         if (neededColumns.size() == 0 ||
             (neededColsToRetrieve.size() > 0 && !statsRetrieved)) {
+          estimateStatsForMissingCols(neededColsToRetrieve, columnStats, 
table, conf, nr, schema);
           // There are some partitions with no state (or we didn't fetch any 
state).
           // Update the stats with empty list to reflect that in the
           // state/initialize structures.
 
-          if(columnStats.isEmpty()) {
-            // estimate stats
-            columnStats = estimateStats(table, schema, neededColumns, conf, 
nr);
-          }
-
           // add partition column stats
           addPartitionColumnStats(conf, partitionColsToRetrieve, schema, 
table, partList, columnStats);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
index f9a1eb8..cd4b0ad 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
@@ -106,22 +106,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE 
Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
-              Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE 
Column stats: PARTIAL
+              Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE 
Column stats: PARTIAL
               Group By Operator
                 aggregations: count()
                 keys: state (type: string), locid (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE 
Column stats: PARTIAL
+                Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE 
Column stats: PARTIAL
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: int)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
-                  Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE 
Column stats: PARTIAL
+                  Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE 
Column stats: PARTIAL
                   value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -129,7 +129,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column 
stats: PARTIAL
           Group By Operator
             aggregations: min(_col1)
             keys: _col0 (type: string), _col2 (type: bigint)
@@ -743,30 +743,30 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE 
Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), zip (type: bigint)
               outputColumnNames: state, zip
-              Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE 
Column stats: PARTIAL
+              Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE 
Column stats: PARTIAL
               Group By Operator
                 keys: state (type: string), zip (type: bigint)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE 
Column stats: PARTIAL
+                Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE 
Column stats: PARTIAL
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: bigint)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
-                  Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE 
Column stats: PARTIAL
+                  Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE 
Column stats: PARTIAL
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column 
stats: PARTIAL
           File Output Operator
             compressed: false
-            Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE 
Column stats: PARTIAL
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
index 31c4ed1..4986879 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
@@ -196,30 +196,30 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: location
-            Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE 
Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), votes (type: bigint)
               outputColumnNames: state, votes
-              Statistics: Num rows: 20 Data size: 1720 Basic stats: COMPLETE 
Column stats: PARTIAL
+              Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE 
Column stats: PARTIAL
               Group By Operator
                 keys: state (type: string), votes (type: bigint)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE 
Column stats: PARTIAL
+                Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE 
Column stats: PARTIAL
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: bigint)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
-                  Statistics: Num rows: 10 Data size: 860 Basic stats: 
COMPLETE Column stats: PARTIAL
+                  Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: PARTIAL
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column 
stats: PARTIAL
           File Output Operator
             compressed: false
-            Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE 
Column stats: PARTIAL
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
index cdb9cea..a73e34d 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
@@ -559,19 +559,19 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: s
-            Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE 
Column stats: PARTIAL
             Filter Operator
               predicate: ((s_company_id > 0) and s_store_sk is not null) 
(type: boolean)
-              Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE 
Column stats: PARTIAL
+              Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE 
Column stats: PARTIAL
               Select Operator
                 expressions: s_store_sk (type: int)
                 outputColumnNames: _col0
-                Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE 
Column stats: PARTIAL
+                Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE 
Column stats: PARTIAL
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE 
Column stats: PARTIAL
+                  Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE 
Column stats: PARTIAL
           TableScan
             alias: ss
             Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE 
Column stats: COMPLETE
@@ -595,10 +595,10 @@ STAGE PLANS:
             0 _col0 (type: int)
             1 _col0 (type: int)
           outputColumnNames: _col0
-          Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE 
Column stats: PARTIAL
           File Output Operator
             compressed: false
-            Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE 
Column stats: PARTIAL
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/annotate_stats_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_select.q.out
index dec7f40..7f5b832 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out
@@ -132,11 +132,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: alltypes_orc
-          Statistics: Num rows: 2 Data size: 420 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 2 Data size: 804 Basic stats: COMPLETE Column 
stats: PARTIAL
           Select Operator
             expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: 
smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: 
double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: 
timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: 
map<string,string>), l1 (type: array<int>), st1 (type: struct<c1:int,c2:string>)
             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
-            Statistics: Num rows: 2 Data size: 420 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 2 Data size: 804 Basic stats: COMPLETE 
Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: explain select bo1 from alltypes_orc
@@ -670,11 +670,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: alltypes_orc
-          Statistics: Num rows: 2 Data size: 420 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 2 Data size: 804 Basic stats: COMPLETE Column 
stats: PARTIAL
           Select Operator
             expressions: bo1 (type: boolean), ti1 (type: tinyint), si1 (type: 
smallint), i1 (type: int), bi1 (type: bigint), f1 (type: float), d1 (type: 
double), de1 (type: decimal(10,0)), ts1 (type: timestamp), da1 (type: 
timestamp), s1 (type: string), vc1 (type: varchar(5)), m1 (type: 
map<string,string>), l1 (type: array<int>), st1 (type: 
struct<c1:int,c2:string>), 11 (type: int)
             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
-            Statistics: Num rows: 2 Data size: 428 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE 
Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: explain select i1 from (select i1 from alltypes_orc limit 10) 
temp

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/annotate_stats_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out 
b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
index 5d443f1..ff7b403 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
@@ -139,11 +139,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 48 Data size: 8840 Basic stats: COMPLETE 
Column stats: PARTIAL
           Select Operator
             expressions: lastname (type: string), deptid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 48 Data size: 192 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 48 Data size: 8840 Basic stats: COMPLETE 
Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: explain select deptid from emp_orc

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out 
b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
index 88b5d84..a603cc6 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
@@ -106,22 +106,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: sq1:loc_orc
-            Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE 
Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
-              Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE 
Column stats: PARTIAL
+              Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE 
Column stats: PARTIAL
               Group By Operator
                 aggregations: count()
                 keys: state (type: string), locid (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE 
Column stats: PARTIAL
+                Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE 
Column stats: PARTIAL
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: int)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
-                  Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE 
Column stats: PARTIAL
+                  Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE 
Column stats: PARTIAL
                   value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -129,7 +129,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: int)
           mode: mergepartial
           outputColumnNames: state, locid, $f2
-          Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column 
stats: PARTIAL
           Group By Operator
             aggregations: min(locid)
             keys: state (type: string), $f2 (type: bigint)
@@ -767,30 +767,30 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE 
Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), zip (type: bigint)
               outputColumnNames: state, zip
-              Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE 
Column stats: PARTIAL
+              Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE 
Column stats: PARTIAL
               Group By Operator
                 keys: state (type: string), zip (type: bigint)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE 
Column stats: PARTIAL
+                Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE 
Column stats: PARTIAL
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: bigint)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
-                  Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE 
Column stats: PARTIAL
+                  Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE 
Column stats: PARTIAL
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
           mode: mergepartial
           outputColumnNames: state, zip
-          Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column 
stats: PARTIAL
           File Output Operator
             compressed: false
-            Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE 
Column stats: PARTIAL
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out 
b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
index 8f9b636..9d00ce6 100644
--- a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
+++ b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out
@@ -38,22 +38,22 @@ STAGE PLANS:
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: f
+            alias: g
             Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: key is not null (type: boolean)
+              predicate: (value <> '') (type: boolean)
               Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: key (type: int)
+                expressions: value (type: string)
                 outputColumnNames: _col0
                 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: int)
+                  key expressions: _col0 (type: string)
                   null sort order: a
                   sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                   tag: 0
                   auto parallelism: false
@@ -70,13 +70,13 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: int)
+                  key expressions: _col1 (type: string)
                   null sort order: a
                   sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
+                  Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                   tag: 1
-                  value expressions: _col1 (type: string)
+                  value expressions: _col0 (type: int)
                   auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
@@ -129,16 +129,16 @@ STAGE PLANS:
               name: default.filter_join_breaktask
             name: default.filter_join_breaktask
       Truncated Path -> Alias:
-        /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f, $hdt$_1:m]
+        /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g, $hdt$_1:m]
       Needs Tagging: true
       Reduce Operator Tree:
         Join Operator
           condition map:
                Inner Join 0 to 1
           keys:
-            0 _col0 (type: int)
-            1 _col0 (type: int)
-          outputColumnNames: _col0, _col3
+            0 _col0 (type: string)
+            1 _col1 (type: string)
+          outputColumnNames: _col0, _col2
           Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
@@ -150,8 +150,8 @@ STAGE PLANS:
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 properties:
                   column.name.delimiter ,
-                  columns _col0,_col3
-                  columns.types int,string
+                  columns _col0,_col2
+                  columns.types string,int
                   escape.delim \
                   serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -165,31 +165,31 @@ STAGE PLANS:
           TableScan
             GatherStats: false
             Reduce Output Operator
-              key expressions: _col3 (type: string)
+              key expressions: _col2 (type: int)
               null sort order: a
               sort order: +
-              Map-reduce partition columns: _col3 (type: string)
+              Map-reduce partition columns: _col2 (type: int)
               Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE 
Column stats: NONE
               tag: 0
-              value expressions: _col0 (type: int)
+              value expressions: _col0 (type: string)
               auto parallelism: false
           TableScan
-            alias: g
+            alias: f
             Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (value <> '') (type: boolean)
+              predicate: key is not null (type: boolean)
               Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
-                expressions: value (type: string)
+                expressions: key (type: int)
                 outputColumnNames: _col0
                 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string)
+                  key expressions: _col0 (type: int)
                   null sort order: a
                   sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                   tag: 1
                   auto parallelism: false
@@ -203,8 +203,8 @@ STAGE PLANS:
             output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
             properties:
               column.name.delimiter ,
-              columns _col0,_col3
-              columns.types int,string
+              columns _col0,_col2
+              columns.types string,int
               escape.delim \
               serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
             serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -213,8 +213,8 @@ STAGE PLANS:
               output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
               properties:
                 column.name.delimiter ,
-                columns _col0,_col3
-                columns.types int,string
+                columns _col0,_col2
+                columns.types string,int
                 escape.delim \
                 serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
               serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -266,7 +266,7 @@ STAGE PLANS:
               name: default.filter_join_breaktask
             name: default.filter_join_breaktask
       Truncated Path -> Alias:
-        /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g]
+        /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:f]
 #### A masked pattern was here ####
       Needs Tagging: true
       Reduce Operator Tree:
@@ -274,12 +274,12 @@ STAGE PLANS:
           condition map:
                Inner Join 0 to 1
           keys:
-            0 _col3 (type: string)
-            1 _col0 (type: string)
+            0 _col2 (type: int)
+            1 _col0 (type: int)
           outputColumnNames: _col0, _col5
           Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
-            expressions: _col0 (type: int), _col5 (type: string)
+            expressions: _col5 (type: int), _col0 (type: string)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out 
b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
index e43f736..e5ba529 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
@@ -1804,34 +1804,36 @@ Stage-0
     Stage-1
       Reducer 2 llap
       File Output Operator [FS_16]
-        Merge Join Operator [MERGEJOIN_27] (rows=292 width=10)
-          Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"]
-        <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_12]
-            PartitionCols:_col1
-            Merge Join Operator [MERGEJOIN_25] (rows=266 width=10)
-              Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"]
-            <-Select Operator [SEL_5] (rows=242 width=10)
-                Output:["_col0"]
-                Filter Operator [FIL_23] (rows=242 width=10)
-                  predicate:key is not null
-                  TableScan [TS_3] (rows=242 width=10)
-                    default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"]
-            <-Select Operator [SEL_2] (rows=242 width=10)
-                Output:["_col0","_col1"]
-                Filter Operator [FIL_22] (rows=242 width=10)
-                  predicate:(key is not null and value is not null)
-                  TableScan [TS_0] (rows=242 width=10)
-                    default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
-        <-Map 4 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_13]
-            PartitionCols:_col1
-            Select Operator [SEL_8] (rows=242 width=10)
-              Output:["_col1"]
-              Filter Operator [FIL_24] (rows=242 width=10)
-                predicate:value is not null
-                TableScan [TS_6] (rows=242 width=10)
-                  default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"]
+        Select Operator [SEL_15] (rows=292 width=10)
+          Output:["_col0","_col1"]
+          Merge Join Operator [MERGEJOIN_27] (rows=292 width=10)
+            Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1","_col2"]
+          <-Map 1 [SIMPLE_EDGE] llap
+            SHUFFLE [RS_12]
+              PartitionCols:_col2
+              Merge Join Operator [MERGEJOIN_25] (rows=266 width=10)
+                Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"]
+              <-Select Operator [SEL_5] (rows=242 width=10)
+                  Output:["_col0","_col1"]
+                  Filter Operator [FIL_23] (rows=242 width=10)
+                    predicate:(key is not null and value is not null)
+                    TableScan [TS_3] (rows=242 width=10)
+                      
default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+              <-Select Operator [SEL_2] (rows=242 width=10)
+                  Output:["_col0"]
+                  Filter Operator [FIL_22] (rows=242 width=10)
+                    predicate:key is not null
+                    TableScan [TS_0] (rows=242 width=10)
+                      default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"]
+          <-Map 4 [SIMPLE_EDGE] llap
+            SHUFFLE [RS_13]
+              PartitionCols:_col1
+              Select Operator [SEL_8] (rows=242 width=10)
+                Output:["_col1"]
+                Filter Operator [FIL_24] (rows=242 width=10)
+                  predicate:value is not null
+                  TableScan [TS_6] (rows=242 width=10)
+                    default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"]
 
 PREHOOK: query: explain
 select count(*) from (select s1.key as key, s1.value as value from tab s1 join 
tab s3 on s1.key=s3.key

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out 
b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out
index 1bff9ea..ecc2246 100644
--- a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out
+++ b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out
@@ -44,23 +44,23 @@ STAGE PLANS:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: f
-                  Statistics: Num rows: 25 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                  alias: g
+                  Statistics: Num rows: 25 Data size: 4600 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 24 Data size: 96 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: (value <> '') (type: boolean)
+                    Statistics: Num rows: 25 Data size: 4600 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: int)
+                      expressions: value (type: string)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 24 Data size: 96 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 25 Data size: 4600 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col0 (type: string)
                         null sort order: a
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 24 Data size: 96 Basic stats: 
COMPLETE Column stats: NONE
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 25 Data size: 4600 Basic stats: 
COMPLETE Column stats: NONE
                         tag: 0
                         auto parallelism: true
             Execution mode: llap
@@ -116,7 +116,7 @@ STAGE PLANS:
                     name: default.filter_join_breaktask
                   name: default.filter_join_breaktask
             Truncated Path -> Alias:
-              /filter_join_breaktask/ds=2008-04-08 [f]
+              /filter_join_breaktask/ds=2008-04-08 [g]
         Map 4 
             Map Operator Tree:
                 TableScan
@@ -132,13 +132,13 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 24 Data size: 4512 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: string)
                         null sort order: a
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 24 Data size: 4512 Basic stats: 
COMPLETE Column stats: NONE
                         tag: 1
-                        value expressions: _col1 (type: string)
+                        value expressions: _col0 (type: int)
                         auto parallelism: true
             Execution mode: llap
             LLAP IO: no inputs
@@ -197,23 +197,23 @@ STAGE PLANS:
         Map 5 
             Map Operator Tree:
                 TableScan
-                  alias: g
-                  Statistics: Num rows: 25 Data size: 4600 Basic stats: 
COMPLETE Column stats: NONE
+                  alias: f
+                  Statistics: Num rows: 25 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (value <> '') (type: boolean)
-                    Statistics: Num rows: 25 Data size: 4600 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 24 Data size: 96 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: value (type: string)
+                      expressions: key (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 25 Data size: 4600 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 24 Data size: 96 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: string)
+                        key expressions: _col0 (type: int)
                         null sort order: a
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 25 Data size: 4600 Basic stats: 
COMPLETE Column stats: NONE
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 24 Data size: 96 Basic stats: 
COMPLETE Column stats: NONE
                         tag: 1
                         auto parallelism: true
             Execution mode: llap
@@ -269,7 +269,7 @@ STAGE PLANS:
                     name: default.filter_join_breaktask
                   name: default.filter_join_breaktask
             Truncated Path -> Alias:
-              /filter_join_breaktask/ds=2008-04-08 [g]
+              /filter_join_breaktask/ds=2008-04-08 [f]
         Reducer 2 
             Execution mode: llap
             Needs Tagging: false
@@ -278,19 +278,19 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col3
-                Position of Big Table: 1
-                Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
+                  0 _col0 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col0, _col2
+                Position of Big Table: 0
+                Statistics: Num rows: 27 Data size: 5060 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col3 (type: string)
+                  key expressions: _col2 (type: int)
                   null sort order: a
                   sort order: +
-                  Map-reduce partition columns: _col3 (type: string)
-                  Statistics: Num rows: 26 Data size: 105 Basic stats: 
COMPLETE Column stats: NONE
+                  Map-reduce partition columns: _col2 (type: int)
+                  Statistics: Num rows: 27 Data size: 5060 Basic stats: 
COMPLETE Column stats: NONE
                   tag: 0
-                  value expressions: _col0 (type: int)
+                  value expressions: _col0 (type: string)
                   auto parallelism: true
         Reducer 3 
             Execution mode: llap
@@ -300,21 +300,21 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col3 (type: string)
-                  1 _col0 (type: string)
+                  0 _col2 (type: int)
+                  1 _col0 (type: int)
                 outputColumnNames: _col0, _col5
                 Position of Big Table: 0
-                Statistics: Num rows: 28 Data size: 115 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 29 Data size: 5566 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), _col5 (type: string)
+                  expressions: _col5 (type: int), _col0 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 28 Data size: 115 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 29 Data size: 5566 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     GlobalTableId: 0
 #### A masked pattern was here ####
                     NumFilesPerFileSink: 1
-                    Statistics: Num rows: 28 Data size: 115 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 29 Data size: 5566 Basic stats: 
COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out 
b/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out
index 738424b..c94002f 100644
--- a/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out
+++ b/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out
@@ -257,25 +257,25 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Stage-1:MAPRED' is a cross product
 PREHOOK: query: explain select c.foo, d.bar from (select c.foo, b.bar, c.blah 
from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d 
where d.foo=1 and c.bar=2
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select c.foo, d.bar from (select c.foo, b.bar, c.blah 
from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d 
where d.foo=1 and c.bar=2
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1
-  Stage-0 depends on stages: Stage-2
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: c
+            alias: b
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
             Filter Operator
-              predicate: foo is not null (type: boolean)
+              predicate: ((bar = 2) and foo is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
               Select Operator
                 expressions: foo (type: int)
@@ -287,10 +287,10 @@ STAGE PLANS:
                   Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
           TableScan
-            alias: b
+            alias: c
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
             Filter Operator
-              predicate: ((bar = 2) and foo is not null) (type: boolean)
+              predicate: foo is not null (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
               Select Operator
                 expressions: foo (type: int)
@@ -308,24 +308,23 @@ STAGE PLANS:
           keys:
             0 _col0 (type: int)
             1 _col0 (type: int)
-          outputColumnNames: _col0
+          outputColumnNames: _col2
           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          Select Operator
+            expressions: _col2 (type: int)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-2
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-              value expressions: _col0 (type: int)
-          TableScan
             alias: d
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
             Filter Operator
@@ -339,6 +338,11 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                   value expressions: _col1 (type: int)
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+              value expressions: _col0 (type: int)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -346,10 +350,10 @@ STAGE PLANS:
           keys:
             0 
             1 
-          outputColumnNames: _col0, _col2
+          outputColumnNames: _col1, _col2
           Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
-            expressions: _col0 (type: int), _col2 (type: int)
+            expressions: _col2 (type: int), _col1 (type: int)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column 
stats: NONE
             File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out 
b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out
index 718346f..320a52e 100644
--- a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out
+++ b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out
@@ -43,22 +43,22 @@ STAGE PLANS:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: f
+                  alias: g
                   Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: key is not null (type: boolean)
+                    predicate: (value <> '') (type: boolean)
                     Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: int)
+                      expressions: value (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col0 (type: string)
                         null sort order: a
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                         tag: 0
                         auto parallelism: false
@@ -113,7 +113,7 @@ STAGE PLANS:
                     name: default.filter_join_breaktask
                   name: default.filter_join_breaktask
             Truncated Path -> Alias:
-              /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f]
+              /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g]
         Map 4 
             Map Operator Tree:
                 TableScan
@@ -129,13 +129,13 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: string)
                         null sort order: a
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                         tag: 1
-                        value expressions: _col1 (type: string)
+                        value expressions: _col0 (type: int)
                         auto parallelism: false
             Path -> Alias:
 #### A masked pattern was here ####
@@ -192,22 +192,22 @@ STAGE PLANS:
         Map 5 
             Map Operator Tree:
                 TableScan
-                  alias: g
+                  alias: f
                   Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (value <> '') (type: boolean)
+                    predicate: key is not null (type: boolean)
                     Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: value (type: string)
+                      expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: string)
+                        key expressions: _col0 (type: int)
                         null sort order: a
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
+                        Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 25 Data size: 211 Basic stats: 
COMPLETE Column stats: NONE
                         tag: 1
                         auto parallelism: false
@@ -262,7 +262,7 @@ STAGE PLANS:
                     name: default.filter_join_breaktask
                   name: default.filter_join_breaktask
             Truncated Path -> Alias:
-              /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g]
+              /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:f]
         Reducer 2 
             Needs Tagging: true
             Reduce Operator Tree:
@@ -270,18 +270,18 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col3
+                  0 _col0 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col3 (type: string)
+                  key expressions: _col2 (type: int)
                   null sort order: a
                   sort order: +
-                  Map-reduce partition columns: _col3 (type: string)
+                  Map-reduce partition columns: _col2 (type: int)
                   Statistics: Num rows: 27 Data size: 232 Basic stats: 
COMPLETE Column stats: NONE
                   tag: 0
-                  value expressions: _col0 (type: int)
+                  value expressions: _col0 (type: string)
                   auto parallelism: false
         Reducer 3 
             Needs Tagging: true
@@ -290,12 +290,12 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col3 (type: string)
-                  1 _col0 (type: string)
+                  0 _col2 (type: int)
+                  1 _col0 (type: int)
                 outputColumnNames: _col0, _col5
                 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), _col5 (type: string)
+                  expressions: _col5 (type: int), _col0 (type: string)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 29 Data size: 255 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/5d5b63dc/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out 
b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out
index 54d1ce3..546ae60 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out
@@ -776,34 +776,36 @@ Stage-0
     Stage-1
       Reducer 2
       File Output Operator [FS_16]
-        Merge Join Operator [MERGEJOIN_27] (rows=278/1166 width=3)
-          Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"]
-        <-Map 1 [SIMPLE_EDGE]
-          SHUFFLE [RS_12]
-            PartitionCols:_col1
-            Merge Join Operator [MERGEJOIN_25] (rows=253/480 width=3)
-              Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"]
-            <-Select Operator [SEL_5] (rows=230/242 width=3)
-                Output:["_col0"]
-                Filter Operator [FIL_23] (rows=230/242 width=3)
-                  predicate:key is not null
-                  TableScan [TS_3] (rows=242/242 width=3)
-                    default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"]
-            <-Select Operator [SEL_2] (rows=218/242 width=179)
-                Output:["_col0","_col1"]
-                Filter Operator [FIL_22] (rows=218/242 width=179)
-                  predicate:(key is not null and value is not null)
-                  TableScan [TS_0] (rows=242/242 width=179)
-                    default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
-        <-Map 4 [SIMPLE_EDGE]
-          SHUFFLE [RS_13]
-            PartitionCols:_col1
-            Select Operator [SEL_8] (rows=230/242 width=175)
-              Output:["_col1"]
-              Filter Operator [FIL_24] (rows=230/242 width=175)
-                predicate:value is not null
-                TableScan [TS_6] (rows=242/242 width=175)
-                  default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"]
+        Select Operator [SEL_15] (rows=278/1166 width=3)
+          Output:["_col0","_col1"]
+          Merge Join Operator [MERGEJOIN_27] (rows=278/1166 width=3)
+            Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1","_col2"]
+          <-Map 1 [SIMPLE_EDGE]
+            SHUFFLE [RS_12]
+              PartitionCols:_col2
+              Merge Join Operator [MERGEJOIN_25] (rows=253/480 width=3)
+                Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"]
+              <-Select Operator [SEL_5] (rows=218/242 width=179)
+                  Output:["_col0","_col1"]
+                  Filter Operator [FIL_23] (rows=218/242 width=179)
+                    predicate:(key is not null and value is not null)
+                    TableScan [TS_3] (rows=242/242 width=179)
+                      
default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+              <-Select Operator [SEL_2] (rows=230/242 width=3)
+                  Output:["_col0"]
+                  Filter Operator [FIL_22] (rows=230/242 width=3)
+                    predicate:key is not null
+                    TableScan [TS_0] (rows=242/242 width=3)
+                      default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"]
+          <-Map 4 [SIMPLE_EDGE]
+            SHUFFLE [RS_13]
+              PartitionCols:_col1
+              Select Operator [SEL_8] (rows=230/242 width=175)
+                Output:["_col1"]
+                Filter Operator [FIL_24] (rows=230/242 width=175)
+                  predicate:value is not null
+                  TableScan [TS_6] (rows=242/242 width=175)
+                    default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"]
 
 PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value 
from tab s1 join tab s3 on s1.key=s3.key
 UNION  ALL

[03/50] [abbrv] hive git commit: HIVE-17538 : Enhance estimation of stats to estimate even if only one column is missing stats (Vineet Garg, reviewed by Ashutosh Chauhan)

Reply via email to