[hive] branch master updated: HIVE-22464: Implement support for NULLS FIRST/LAST in TopNKeyOperator (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)

jcamacho Sun, 17 Nov 2019 20:11:21 -0800

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new fc81b89  HIVE-22464: Implement support for NULLS FIRST/LAST in 
TopNKeyOperator (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)
fc81b89 is described below

commit fc81b8909b1a0e6aa15900387a98bccf38ae2247
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Sun Nov 17 20:09:10 2019 -0800

    HIVE-22464: Implement support for NULLS FIRST/LAST in TopNKeyOperator 
(Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)
---
 .../hadoop/hive/ql/exec/TopNKeyOperator.java       |  43 +-
 .../hadoop/hive/ql/optimizer/TopNKeyProcessor.java |  10 +-
 .../apache/hadoop/hive/ql/plan/TopNKeyDesc.java    |  14 +
 .../apache/hadoop/hive/ql/util/NullOrdering.java   |  21 +-
 ql/src/test/queries/clientpositive/topnkey.q       |  55 ++-
 .../test/results/clientpositive/llap/topnkey.q.out | 472 ++++++++++++++++++-
 .../test/results/clientpositive/tez/topnkey.q.out  | 482 ++++++++++++++++++--
 ql/src/test/results/clientpositive/topnkey.q.out   | 499 ++++++++++++++++++++-
 .../objectinspector/ObjectInspectorUtils.java      |  51 ++-
 9 files changed, 1571 insertions(+), 76 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
index 4734824..d16500e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.TopNKeyDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.util.NullOrdering;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 
@@ -58,21 +59,26 @@ public class TopNKeyOperator extends Operator<TopNKeyDesc> 
implements Serializab
   }
 
   public static class KeyWrapperComparator implements Comparator<KeyWrapper> {
-    private ObjectInspector[] objectInspectors1;
-    private ObjectInspector[] objectInspectors2;
-    private boolean[] columnSortOrderIsDesc;
-
-    public KeyWrapperComparator(ObjectInspector[] objectInspectors1, 
ObjectInspector[]
-        objectInspectors2, boolean[] columnSortOrderIsDesc) {
-      this.objectInspectors1 = objectInspectors1;
-      this.objectInspectors2 = objectInspectors2;
-      this.columnSortOrderIsDesc = columnSortOrderIsDesc;
+
+    private final ObjectInspector[] keyObjectInspectors;
+    private final boolean[] columnSortOrderIsDesc;
+    private final ObjectInspectorUtils.NullValueOption[] nullSortOrder;
+
+    KeyWrapperComparator(ObjectInspector[] keyObjectInspectors, String 
columnSortOrder, String nullSortOrder) {
+      this.keyObjectInspectors = keyObjectInspectors;
+      this.columnSortOrderIsDesc = new boolean[columnSortOrder.length()];
+      this.nullSortOrder = new 
ObjectInspectorUtils.NullValueOption[nullSortOrder.length()];
+      for (int i = 0; i < columnSortOrder.length(); ++i) {
+        this.columnSortOrderIsDesc[i] = columnSortOrder.charAt(i) == '-';
+        this.nullSortOrder[i] = 
NullOrdering.fromSign(nullSortOrder.charAt(i)).getNullValueOption();
+      }
     }
 
     @Override
     public int compare(KeyWrapper key1, KeyWrapper key2) {
-      return ObjectInspectorUtils.compare(key1.getKeyArray(), 
objectInspectors1,
-          key2.getKeyArray(), objectInspectors2, columnSortOrderIsDesc);
+      return ObjectInspectorUtils.compare(
+              key1.getKeyArray(), keyObjectInspectors, key2.getKeyArray(), 
keyObjectInspectors,
+              columnSortOrderIsDesc, nullSortOrder);
     }
   }
 
@@ -82,12 +88,6 @@ public class TopNKeyOperator extends Operator<TopNKeyDesc> 
implements Serializab
 
     this.topN = conf.getTopN();
 
-    String columnSortOrder = conf.getColumnSortOrder();
-    boolean[] columnSortOrderIsDesc = new boolean[columnSortOrder.length()];
-    for (int i = 0; i < columnSortOrderIsDesc.length; i++) {
-      columnSortOrderIsDesc[i] = (columnSortOrder.charAt(i) == '-');
-    }
-
     ObjectInspector rowInspector = inputObjInspectors[0];
     ObjectInspector standardObjInspector = 
ObjectInspectorUtils.getStandardObjectInspector(rowInspector);
     outputObjInspector = rowInspector;
@@ -107,8 +107,13 @@ public class TopNKeyOperator extends Operator<TopNKeyDesc> 
implements Serializab
       standardKeyObjectInspectors[i] = 
standardKeyFields[i].initialize(standardObjInspector);
     }
 
-    priorityQueue = new PriorityQueue<>(topN + 1, new 
TopNKeyOperator.KeyWrapperComparator(
-        standardKeyObjectInspectors, standardKeyObjectInspectors, 
columnSortOrderIsDesc));
+    String columnSortOrder = conf.getColumnSortOrder();
+    String nullSortOrder = conf.getNullOrder();
+
+    // We need a reversed comparator because the PriorityQueue.poll() method 
is used for filtering out keys.
+    // Ex.: When ORDER BY key1 ASC then call of poll() should remove the 
largest key.
+    priorityQueue = new PriorityQueue<>(topN + 1,
+            new KeyWrapperComparator(standardKeyObjectInspectors, 
columnSortOrder, nullSortOrder).reversed());
 
     KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, 
keyObjectInspectors,
         standardKeyObjectInspectors);
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java
index 721a9b9..4b4cf99 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java
@@ -53,7 +53,7 @@ public class TopNKeyProcessor implements NodeProcessor {
 
   @Override
   public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
-      Object... nodeOutputs) throws SemanticException {
+                        Object... nodeOutputs) throws SemanticException {
 
     // Get ReduceSinkOperator
     ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) nd;
@@ -95,11 +95,11 @@ public class TopNKeyProcessor implements NodeProcessor {
     }
 
     // Insert a new top n key operator between the group by operator and its 
parent
-    TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), 
reduceSinkDesc.getOrder(),
-        groupByKeyColumns);
+    TopNKeyDesc topNKeyDesc = new TopNKeyDesc(
+            reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), 
reduceSinkDesc.getNullOrder(), groupByKeyColumns);
     Operator<? extends OperatorDesc> newOperator = 
OperatorFactory.getAndMakeChild(
-        groupByOperator.getCompilationOpContext(), (OperatorDesc) topNKeyDesc,
-        new RowSchema(groupByOperator.getSchema()), 
groupByOperator.getParentOperators());
+            groupByOperator.getCompilationOpContext(), (OperatorDesc) 
topNKeyDesc,
+            new RowSchema(groupByOperator.getSchema()), 
groupByOperator.getParentOperators());
     newOperator.getChildOperators().add(groupByOperator);
     groupByOperator.getParentOperators().add(newOperator);
     parentOperator.removeChild(groupByOperator);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java
index c62c4a9..84e6149 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java
@@ -33,6 +33,7 @@ public class TopNKeyDesc extends AbstractOperatorDesc {
 
   private int topN;
   private String columnSortOrder;
+  private String nullOrder;
   private List<ExprNodeDesc> keyColumns;
 
   public TopNKeyDesc() {
@@ -41,10 +42,12 @@ public class TopNKeyDesc extends AbstractOperatorDesc {
   public TopNKeyDesc(
       final int topN,
       final String columnSortOrder,
+      final String nullOrder,
       final List<ExprNodeDesc> keyColumns) {
 
     this.topN = topN;
     this.columnSortOrder = columnSortOrder;
+    this.nullOrder = nullOrder;
     this.keyColumns = keyColumns;
   }
 
@@ -66,6 +69,15 @@ public class TopNKeyDesc extends AbstractOperatorDesc {
     this.columnSortOrder = columnSortOrder;
   }
 
+  @Explain(displayName = "null sort order", explainLevels = { Level.EXTENDED })
+  public String getNullOrder() {
+    return nullOrder;
+  }
+
+  public void setNullOrder(String nullOrder) {
+    this.nullOrder = nullOrder;
+  }
+
   @Explain(displayName = "keys")
   public String getKeyString() {
     return PlanUtils.getExprListString(keyColumns);
@@ -98,6 +110,7 @@ public class TopNKeyDesc extends AbstractOperatorDesc {
       TopNKeyDesc otherDesc = (TopNKeyDesc) other;
       return getTopN() == otherDesc.getTopN() &&
           Objects.equals(columnSortOrder, otherDesc.columnSortOrder) &&
+          Objects.equals(nullOrder, otherDesc.nullOrder) &&
           ExprNodeDescUtils.isSame(keyColumns, otherDesc.keyColumns);
     }
     return false;
@@ -108,6 +121,7 @@ public class TopNKeyDesc extends AbstractOperatorDesc {
     TopNKeyDesc ret = new TopNKeyDesc();
     ret.setTopN(topN);
     ret.setColumnSortOrder(columnSortOrder);
+    ret.setNullOrder(nullOrder);
     ret.setKeyColumns(getKeyColumns() == null ? null : new 
ArrayList<>(getKeyColumns()));
     return ret;
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java 
b/ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java
index 6bf1db2..46ff329 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java
@@ -24,18 +24,20 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.NullVa
  * Enum for converting different Null ordering description types.
  */
 public enum NullOrdering {
-  NULLS_FIRST(1, HiveParser.TOK_NULLS_FIRST, NullValueOption.MAXVALUE),
-  NULLS_LAST(0, HiveParser.TOK_NULLS_LAST, NullValueOption.MINVALUE);
+  NULLS_FIRST(1, HiveParser.TOK_NULLS_FIRST, NullValueOption.MAXVALUE, 'a'),
+  NULLS_LAST(0, HiveParser.TOK_NULLS_LAST, NullValueOption.MINVALUE, 'z');
 
-  NullOrdering(int code, int token, NullValueOption nullValueOption) {
+  NullOrdering(int code, int token, NullValueOption nullValueOption, char 
sign) {
     this.code = code;
     this.token = token;
     this.nullValueOption = nullValueOption;
+    this.sign = sign;
   }
 
   private final int code;
   private final int token;
   private final NullValueOption nullValueOption;
+  private final char sign;
 
   public static NullOrdering fromToken(int token) {
     for (NullOrdering nullOrdering : NullOrdering.values()) {
@@ -55,6 +57,15 @@ public enum NullOrdering {
     throw new EnumConstantNotPresentException(NullOrdering.class, "No enum 
constant present with code " + code);
   }
 
+  public static NullOrdering fromSign(char sign) {
+    for (NullOrdering nullOrdering : NullOrdering.values()) {
+      if (nullOrdering.sign == sign) {
+        return nullOrdering;
+      }
+    }
+    throw new EnumConstantNotPresentException(NullOrdering.class, "No enum 
constant present with sign " + sign);
+  }
+
   public int getCode() {
     return code;
   }
@@ -66,4 +77,8 @@ public enum NullOrdering {
   public NullValueOption getNullValueOption() {
     return nullValueOption;
   }
+
+  public char getSign() {
+    return sign;
+  }
 }
diff --git a/ql/src/test/queries/clientpositive/topnkey.q 
b/ql/src/test/queries/clientpositive/topnkey.q
index e02a41d..283f426 100644
--- a/ql/src/test/queries/clientpositive/topnkey.q
+++ b/ql/src/test/queries/clientpositive/topnkey.q
@@ -14,7 +14,7 @@ set hive.tez.dynamic.partition.pruning=true;
 set hive.stats.fetch.column.stats=true;
 set hive.cbo.enable=true;
 
-EXPLAIN
+EXPLAIN EXTENDED
 SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5;
 
 SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5;
@@ -29,3 +29,56 @@ SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key)
 
 SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5;
 
+CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+);
+
+INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL);
+
+EXPLAIN
+SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3;
+SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3;
+
+
+EXPLAIN
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS FIRST LIMIT 2;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS LAST LIMIT 2;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 2;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS FIRST LIMIT 2;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS LAST LIMIT 2;
+
+DROP TABLE IF EXISTS t_test;
+
+CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+);
+
+INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL);
+
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC LIMIT 2;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS FIRST LIMIT 2;
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC NULLS LAST LIMIT 2;
+
+DROP TABLE IF EXISTS t_test;
diff --git a/ql/src/test/results/clientpositive/llap/topnkey.q.out 
b/ql/src/test/results/clientpositive/llap/topnkey.q.out
index e3c0bc1..cd47e9d 100644
--- a/ql/src/test/results/clientpositive/llap/topnkey.q.out
+++ b/ql/src/test/results/clientpositive/llap/topnkey.q.out
@@ -1,13 +1,18 @@
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN EXTENDED
 SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 #### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN EXTENDED
 SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `key` AS `$f0`, SUM(CAST(SUBSTR(`value`, 5) AS INTEGER)) 
AS `$f1`
+FROM `default`.`src`
+GROUP BY `key`
+ORDER BY `key`
+LIMIT 5
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -26,6 +31,7 @@ STAGE PLANS:
                 TableScan
                   alias: src
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  GatherStats: false
                   Select Operator
                     expressions: key (type: string), 
UDFToInteger(substr(value, 5)) (type: int)
                     outputColumnNames: _col0, _col1
@@ -33,6 +39,7 @@ STAGE PLANS:
                     Top N Key Operator
                       sort order: +
                       keys: _col0 (type: string)
+                      null sort order: z
                       Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
                       top n: 5
                       Group By Operator
@@ -44,15 +51,73 @@ STAGE PLANS:
                         Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: string)
+                          null sort order: z
                           sort order: +
                           Map-reduce partition columns: _col0 (type: string)
                           Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          TopN: 5
                           TopN Hash Memory Usage: 0.1
                           value expressions: _col1 (type: bigint)
+                          auto parallelism: true
             Execution mode: llap
             LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: src
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns key,value
+                    columns.comments 'default','default'
+                    columns.types string:string
+#### A masked pattern was here ####
+                    name default.src
+                    numFiles 1
+                    numRows 500
+                    rawDataSize 5312
+                    serialization.ddl struct src { string key, string value}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                      bucket_count -1
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns key,value
+                      columns.comments 'default','default'
+                      columns.types string:string
+#### A masked pattern was here ####
+                      name default.src
+                      numFiles 1
+                      numRows 500
+                      rawDataSize 5312
+                      serialization.ddl struct src { string key, string value}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      totalSize 5812
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.src
+                  name: default.src
+            Truncated Path -> Alias:
+              /src [src]
         Reducer 2 
             Execution mode: llap
+            Needs Tagging: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -62,12 +127,17 @@ STAGE PLANS:
                 Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
+                  null sort order: z
                   sort order: +
                   Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  tag: -1
+                  TopN: 5
                   TopN Hash Memory Usage: 0.1
                   value expressions: _col1 (type: bigint)
+                  auto parallelism: false
         Reducer 3 
             Execution mode: llap
+            Needs Tagging: false
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: bigint)
@@ -78,11 +148,26 @@ STAGE PLANS:
                   Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
                   File Output Operator
                     compressed: false
+                    GlobalTableId: 0
+#### A masked pattern was here ####
+                    NumFilesPerFileSink: 1
                     Statistics: Num rows: 5 Data size: 475 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        properties:
+                          columns _col0,_col1
+                          columns.types string:bigint
+                          escape.delim \
+                          hive.serialization.extend.additional.nesting.levels 
true
+                          serialization.escape.crlf true
+                          serialization.format 1
+                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    TotalFiles: 1
+                    GatherStats: false
+                    MultiFileSpray: false
 
   Stage: Stage-0
     Fetch Operator
@@ -330,3 +415,386 @@ POSTHOOK: Input: default@src
 0      val_0
 0      val_0
 0      val_0
+PREHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: EXPLAIN
+SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t_test
+                  Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: a (type: int), b (type: int)
+                    outputColumnNames: a, b
+                    Statistics: Num rows: 12 Data size: 72 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Top N Key Operator
+                      sort order: ++
+                      keys: a (type: int), b (type: int)
+                      Statistics: Num rows: 12 Data size: 72 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      top n: 3
+                      Group By Operator
+                        keys: a (type: int), b (type: int)
+                        minReductionHashAggr: 0.3333333
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int), _col1 (type: int)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: int), 
_col1 (type: int)
+                          Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          TopN Hash Memory Usage: 0.1
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int)
+                  sort order: ++
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey1 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Limit
+                  Number of rows: 3
+                  Statistics: Num rows: 3 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 3
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5      1
+5      2
+6      2
+PREHOOK: query: EXPLAIN
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t_test
+                  Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: a (type: int), b (type: int)
+                    outputColumnNames: a, b
+                    Statistics: Num rows: 12 Data size: 72 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Top N Key Operator
+                      sort order: +
+                      keys: a (type: int)
+                      Statistics: Num rows: 12 Data size: 72 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      top n: 2
+                      Group By Operator
+                        aggregations: count(b)
+                        keys: a (type: int)
+                        minReductionHashAggr: 0.6666666
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 4 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 4 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          TopN Hash Memory Usage: 0.1
+                          value expressions: _col1 (type: bigint)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Limit
+                  Number of rows: 2
+                  Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 20 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 2
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5      4
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL   0
+5      4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5      4
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 
2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC 
LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5      4
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS 
FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC 
NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL   0
+5      4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS 
LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC 
NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5      4
+6      1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
+PREHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7      3
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL   0
+7      3
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7      3
+6      1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/tez/topnkey.q.out 
b/ql/src/test/results/clientpositive/tez/topnkey.q.out
index 3267f79..206c0c8 100644
--- a/ql/src/test/results/clientpositive/tez/topnkey.q.out
+++ b/ql/src/test/results/clientpositive/tez/topnkey.q.out
@@ -1,44 +1,177 @@
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN EXTENDED
 SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN EXTENDED
 SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: hdfs://### HDFS PATH ###
-Plan optimized by CBO.
+OPTIMIZED SQL: SELECT `key` AS `$f0`, SUM(CAST(SUBSTR(`value`, 5) AS INTEGER)) 
AS `$f1`
+FROM `default`.`src`
+GROUP BY `key`
+ORDER BY `key`
+LIMIT 5
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  GatherStats: false
+                  Select Operator
+                    expressions: key (type: string), 
UDFToInteger(substr(value, 5)) (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Top N Key Operator
+                      sort order: +
+                      keys: _col0 (type: string)
+                      null sort order: z
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      top n: 5
+                      Group By Operator
+                        aggregations: sum(_col1)
+                        keys: _col0 (type: string)
+                        minReductionHashAggr: 0.5
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          null sort order: z
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          TopN: 5
+                          TopN Hash Memory Usage: 0.1
+                          value expressions: _col1 (type: bigint)
+                          auto parallelism: true
+            Path -> Alias:
+              hdfs://### HDFS PATH ### [src]
+            Path -> Partition:
+              hdfs://### HDFS PATH ### 
+                Partition
+                  base file name: src
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns key,value
+                    columns.comments 'default','default'
+                    columns.types string:string
+#### A masked pattern was here ####
+                    location hdfs://### HDFS PATH ###
+                    name default.src
+                    numFiles 1
+                    numRows 500
+                    rawDataSize 5312
+                    serialization.ddl struct src { string key, string value}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                      bucket_count -1
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns key,value
+                      columns.comments 'default','default'
+                      columns.types string:string
+#### A masked pattern was here ####
+                      location hdfs://### HDFS PATH ###
+                      name default.src
+                      numFiles 1
+                      numRows 500
+                      rawDataSize 5312
+                      serialization.ddl struct src { string key, string value}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      totalSize 5812
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.src
+                  name: default.src
+            Truncated Path -> Alias:
+              /src [src]
+        Reducer 2 
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  tag: -1
+                  TopN: 5
+                  TopN Hash Memory Usage: 0.1
+                  value expressions: _col1 (type: bigint)
+                  auto parallelism: false
+        Reducer 3 
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 5
+                  Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    directory: hdfs://### HDFS PATH ###
+                    NumFilesPerFileSink: 1
+                    Statistics: Num rows: 5 Data size: 475 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        properties:
+                          columns _col0,_col1
+                          columns.types string:bigint
+                          escape.delim \
+                          hive.serialization.extend.additional.nesting.levels 
true
+                          serialization.escape.crlf true
+                          serialization.format 1
+                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    TotalFiles: 1
+                    GatherStats: false
+                    MultiFileSpray: false
 
-Stage-0
-  Fetch Operator
-    limit:5
-    Stage-1
-      Reducer 3
-      File Output Operator [FS_10]
-        Limit [LIM_9] (rows=5 width=95)
-          Number of rows:5
-          Select Operator [SEL_8] (rows=250 width=95)
-            Output:["_col0","_col1"]
-          <-Reducer 2 [SIMPLE_EDGE]
-            SHUFFLE [RS_7]
-              Group By Operator [GBY_5] (rows=250 width=95)
-                
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
-              <-Map 1 [SIMPLE_EDGE]
-                SHUFFLE [RS_4]
-                  PartitionCols:_col0
-                  Group By Operator [GBY_3] (rows=250 width=95)
-                    
Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
-                    Top N Key Operator [TNK_11] (rows=500 width=178)
-                      keys:_col0,sort order:+,top n:5
-                      Select Operator [SEL_1] (rows=500 width=178)
-                        Output:["_col0","_col1"]
-                        TableScan [TS_0] (rows=500 width=178)
-                          
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
 
 PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
 PREHOOK: type: QUERY
@@ -228,3 +361,290 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 0      val_0
 0      val_0
 0      val_0
+PREHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: EXPLAIN
+SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: EXPLAIN
+SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:3
+    Stage-1
+      Reducer 3
+      File Output Operator [FS_9]
+        Limit [LIM_8] (rows=3 width=5)
+          Number of rows:3
+          Select Operator [SEL_7] (rows=6 width=4)
+            Output:["_col0","_col1"]
+          <-Reducer 2 [SIMPLE_EDGE]
+            SHUFFLE [RS_6]
+              Group By Operator [GBY_4] (rows=6 width=4)
+                Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+              <-Map 1 [SIMPLE_EDGE]
+                SHUFFLE [RS_3]
+                  PartitionCols:_col0, _col1
+                  Group By Operator [GBY_2] (rows=6 width=4)
+                    Output:["_col0","_col1"],keys:a, b
+                    Top N Key Operator [TNK_10] (rows=12 width=6)
+                      keys:a, b,sort order:++,top n:3
+                      Select Operator [SEL_1] (rows=12 width=6)
+                        Output:["a","b"]
+                        TableScan [TS_0] (rows=12 width=6)
+                          
default@t_test,t_test,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+
+PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5      1
+5      2
+6      2
+PREHOOK: query: EXPLAIN
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: EXPLAIN
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:2
+    Stage-1
+      Reducer 3
+      File Output Operator [FS_9]
+        Limit [LIM_8] (rows=2 width=10)
+          Number of rows:2
+          Select Operator [SEL_7] (rows=4 width=10)
+            Output:["_col0","_col1"]
+          <-Reducer 2 [SIMPLE_EDGE]
+            SHUFFLE [RS_6]
+              Group By Operator [GBY_4] (rows=4 width=10)
+                
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
+              <-Map 1 [SIMPLE_EDGE]
+                SHUFFLE [RS_3]
+                  PartitionCols:_col0
+                  Group By Operator [GBY_2] (rows=4 width=10)
+                    Output:["_col0","_col1"],aggregations:["count(b)"],keys:a
+                    Top N Key Operator [TNK_10] (rows=12 width=6)
+                      keys:a,sort order:+,top n:2
+                      Select Operator [SEL_1] (rows=12 width=6)
+                        Output:["a","b"]
+                        TableScan [TS_0] (rows=12 width=6)
+                          
default@t_test,t_test,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
+
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5      4
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL   0
+5      4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5      4
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 
2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC 
LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5      4
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS 
FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC 
NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL   0
+5      4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS 
LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC 
NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+5      4
+6      1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
+PREHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+7      3
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+NULL   0
+7      3
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+7      3
+6      1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/topnkey.q.out 
b/ql/src/test/results/clientpositive/topnkey.q.out
index 7ea1a7a..66d0eca 100644
--- a/ql/src/test/results/clientpositive/topnkey.q.out
+++ b/ql/src/test/results/clientpositive/topnkey.q.out
@@ -1,13 +1,18 @@
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN EXTENDED
 SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 #### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN EXTENDED
 SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `key` AS `$f0`, SUM(CAST(SUBSTR(`value`, 5) AS INTEGER)) 
AS `$f1`
+FROM `default`.`src`
+GROUP BY `key`
+ORDER BY `key`
+LIMIT 5
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -20,6 +25,7 @@ STAGE PLANS:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+            GatherStats: false
             Select Operator
               expressions: key (type: string), UDFToInteger(substr(value, 5)) 
(type: int)
               outputColumnNames: _col0, _col1
@@ -33,11 +39,69 @@ STAGE PLANS:
                 Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
+                  null sort order: z
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  tag: -1
+                  TopN: 5
                   TopN Hash Memory Usage: 0.1
                   value expressions: _col1 (type: bigint)
+                  auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: src
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+              bucket_count -1
+              bucketing_version 2
+              column.name.delimiter ,
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.src
+              numFiles 1
+              numRows 500
+              rawDataSize 5312
+              serialization.ddl struct src { string key, string value}
+              serialization.format 1
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                bucket_count -1
+                bucketing_version 2
+                column.name.delimiter ,
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.src
+                numFiles 1
+                numRows 500
+                rawDataSize 5312
+                serialization.ddl struct src { string key, string value}
+                serialization.format 1
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 5812
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src
+            name: default.src
+      Truncated Path -> Alias:
+        /src [$hdt$_0:src]
+      Needs Tagging: false
       Reduce Operator Tree:
         Group By Operator
           aggregations: sum(VALUE._col0)
@@ -47,21 +111,66 @@ STAGE PLANS:
           Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE 
Column stats: COMPLETE
           File Output Operator
             compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  column.name.delimiter ,
+                  columns _col0,_col1
+                  columns.types string,bigint
+                  escape.delim \
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
+            GatherStats: false
             Reduce Output Operator
               key expressions: _col0 (type: string)
+              null sort order: z
               sort order: +
               Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE 
Column stats: COMPLETE
+              tag: -1
+              TopN: 5
               TopN Hash Memory Usage: 0.1
               value expressions: _col1 (type: bigint)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10004
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              column.name.delimiter ,
+              columns _col0,_col1
+              columns.types string,bigint
+              escape.delim \
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                column.name.delimiter ,
+                columns _col0,_col1
+                columns.types string,bigint
+                escape.delim \
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
bigint)
@@ -72,11 +181,26 @@ STAGE PLANS:
             Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
               Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
+#### A masked pattern was here ####
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1
+                    columns.types string:bigint
+                    escape.delim \
+                    hive.serialization.extend.additional.nesting.levels true
+                    serialization.escape.crlf true
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
 
   Stage: Stage-0
     Fetch Operator
@@ -313,3 +437,374 @@ POSTHOOK: Input: default@src
 0      val_0
 0      val_0
 0      val_0
+PREHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: EXPLAIN
+SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t_test
+            Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              expressions: a (type: int), b (type: int)
+              outputColumnNames: a, b
+              Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                keys: a (type: int), b (type: int)
+                minReductionHashAggr: 0.99
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
+                  Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column 
stats: COMPLETE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col1 (type: int)
+              sort order: ++
+              Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE 
Column stats: COMPLETE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 
(type: int)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Limit
+            Number of rows: 3
+            Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column 
stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 3
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5      1
+5      2
+6      2
+PREHOOK: query: EXPLAIN
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t_test
+            Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              expressions: a (type: int), b (type: int)
+              outputColumnNames: a, b
+              Statistics: Num rows: 12 Data size: 72 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                aggregations: count(b)
+                keys: a (type: int)
+                minReductionHashAggr: 0.99
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column 
stats: COMPLETE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE 
Column stats: COMPLETE
+              TopN Hash Memory Usage: 0.1
+              value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: 
bigint)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Limit
+            Number of rows: 2
+            Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column 
stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 2
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5      4
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL   0
+5      4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a NULLS 
LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5      4
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC LIMIT 
2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC 
LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5      4
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS 
FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC 
NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL   0
+5      4
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC NULLS 
LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a ASC 
NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5      4
+6      1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
+PREHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test(
+  a int,
+  b int,
+  c int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(7, 8, 4), (7, 8, 4), (7, 8, 4),
+(NULL, NULL, NULL),
+(5, 2, 3),
+(NULL, NULL, NULL),
+(NULL, NULL, NULL),
+(6, 2, 1),
+(5, 1, 2), (5, 1, 2), (5, 1, 2),
+(NULL, NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.a SCRIPT []
+POSTHOOK: Lineage: t_test.b SCRIPT []
+POSTHOOK: Lineage: t_test.c SCRIPT []
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7      3
+6      1
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS FIRST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS FIRST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+NULL   0
+7      3
+PREHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS LAST LIMIT 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, count(b) FROM t_test GROUP BY a ORDER BY a DESC 
NULLS LAST LIMIT 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+7      3
+6      1
+PREHOOK: query: DROP TABLE IF EXISTS t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE IF EXISTS t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
index df6e788..ab1cfdc 100644
--- 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
+++ 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
@@ -18,12 +18,16 @@
 
 package org.apache.hadoop.hive.serde2.objectinspector;
 
+import static java.util.Comparator.nullsFirst;
+import static java.util.Comparator.nullsLast;
+
 import java.lang.reflect.Field;
 import java.lang.reflect.Modifier;
 import java.lang.reflect.Type;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Comparator;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -944,20 +948,41 @@ public final class ObjectInspectorUtils {
     return 0;
   }
 
-  public static int compare(Object[] o1, ObjectInspector[] oi1, Object[] o2,
-                            ObjectInspector[] oi2, boolean[] 
columnSortOrderIsDesc) {
-    assert (o1.length == oi1.length);
-    assert (o2.length == oi2.length);
-    assert (o1.length == o2.length);
+  public static int compare(
+          Object[] objectArray1, ObjectInspector[] oi1, Object[] objectArray2, 
ObjectInspector[] oi2,
+          boolean[] columnSortOrderIsDesc, NullValueOption[] nullSortOrder) {
+    assert (objectArray1.length == objectArray2.length);
+    assert (objectArray1.length == oi1.length);
+    assert (objectArray2.length == oi2.length);
+    assert (columnSortOrderIsDesc.length == objectArray1.length);
+    assert (nullSortOrder.length == objectArray1.length);
 
-    for (int i = 0; i < o1.length; i++) {
-      int r = compare(o1[i], oi1[i], o2[i], oi2[i]);
-      if (r != 0) {
-        if (columnSortOrderIsDesc[i]) {
-          return r;
-        } else {
-          return -r;
-        }
+    List<Comparator<Object>> comparators = new ArrayList<>(oi1.length);
+    for (int i = 0; i < oi1.length; i++) {
+      final int keyIndex = i;
+
+      Comparator<Object> comparator = (o1, o2) -> compare(
+              o1, oi1[keyIndex], o2, oi2[keyIndex]);
+
+      if (columnSortOrderIsDesc[i]) {
+        comparator = comparator.reversed();
+      }
+
+      if (nullSortOrder[i] == NullValueOption.MAXVALUE) {
+        comparators.add(nullsFirst(comparator));
+      } else {
+        comparators.add(nullsLast(comparator));
+      }
+    }
+
+    return compare(comparators, objectArray1, objectArray2);
+  }
+
+  public static <T> int compare(List<Comparator<T>> comparatorList, T[] o1, 
T[] o2) {
+    for (int i = 0; i < comparatorList.size(); ++i) {
+      int c = comparatorList.get(i).compare(o1[i], o2[i]);
+      if (c != 0) {
+        return c;
       }
     }
     return 0;

[hive] branch master updated: HIVE-22464: Implement support for NULLS FIRST/LAST in TopNKeyOperator (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)

Reply via email to