[15/54] [abbrv] hive git commit: HIVE-17896: TopNKey: Create a standalone vectorizable TopNKey operator (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

sershe Thu, 19 Jul 2018 14:45:03 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out 
b/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
new file mode 100644
index 0000000..16803c9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
@@ -0,0 +1,592 @@
+PREHOOK: query: explain vectorization detail
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string), 
UDFToInteger(substr(value, 5)) (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 4]
+                        selectExpressions: CastStringToLong(col 
3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 
4:int
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Top N Key Operator
+                      sort order: +
+                      keys: _col0 (type: string)
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      top n: 5
+                      Top N Key Vectorization:
+                          className: VectorTopNKeyOperator
+                          keyExpressions: col 0:string
+                          native: true
+                      Group By Operator
+                        aggregations: sum(_col1)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFSumLong(col 4:int) -> bigint
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            keyExpressions: col 0:string
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: [0]
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkStringOperator
+                              keyColumnNums: [0]
+                              native: true
+                              nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumnNums: [1]
+                          Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          TopN Hash Memory Usage: 0.1
+                          value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [string, bigint]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY._col0:string, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0]
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkObjectHashOperator
+                      keyColumnNums: [0]
+                      native: true
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      valueColumnNums: [1]
+                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:string, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1]
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 5
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 5 Data size: 475 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      0
+10     10
+100    200
+103    206
+104    208
+PREHOOK: query: explain vectorization detail
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Top N Key Operator
+                      sort order: +
+                      keys: key (type: string)
+                      Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      top n: 5
+                      Top N Key Vectorization:
+                          className: VectorTopNKeyOperator
+                          keyExpressions: col 0:string
+                          native: true
+                      Group By Operator
+                        Group By Vectorization:
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            keyExpressions: col 0:string
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: []
+                        keys: key (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 250 Data size: 21750 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkStringOperator
+                              keyColumnNums: [0]
+                              native: true
+                              nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumnNums: []
+                          Statistics: Num rows: 250 Data size: 21750 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          TopN Hash Memory Usage: 0.1
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY._col0:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 21750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkObjectHashOperator
+                      keyColumnNums: [0]
+                      native: true
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      valueColumnNums: []
+                  Statistics: Num rows: 250 Data size: 21750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY.reducesinkkey0:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0]
+                Statistics: Num rows: 250 Data size: 21750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 5
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 5 Data size: 435 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0
+10
+100
+103
+104
+PREHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src1
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 
0:string)
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
+                      Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkStringOperator
+                            keyColumnNums: [0]
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: []
+                        Statistics: Num rows: 500 Data size: 43500 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: src2
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 
0:string)
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1]
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkStringOperator
+                            keyColumnNums: [0]
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: [1]
+                        Statistics: Num rows: 500 Data size: 89000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    TopN Hash Memory Usage: 0.1
+                    value expressions: _col1 (type: string)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: string)
+                outputColumnNames: _col0, _col1
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1]
+                Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 5
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 5 Data size: 890 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      val_0
+0      val_0
+0      val_0
+0      val_0
+0      val_0


http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
index e79cdf7..f7c00f8 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
@@ -491,31 +491,40 @@ STAGE PLANS:
                         native: true
                         projectedOutputColumnNums: [0]
                     Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      Group By Vectorization:
-                          className: VectorGroupByOperator
-                          groupByMode: HASH
-                          keyExpressions: col 0:tinyint
-                          native: false
-                          vectorProcessingMode: HASH
-                          projectedOutputColumnNums: []
+                    Top N Key Operator
+                      sort order: +
                       keys: ctinyint (type: tinyint)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 131 Data size: 396 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: tinyint)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: tinyint)
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
-                            keyColumnNums: [0]
-                            native: true
-                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                            partitionColumnNums: [0]
-                            valueColumnNums: []
-                        Statistics: Num rows: 131 Data size: 396 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        TopN Hash Memory Usage: 0.3
+                      Statistics: Num rows: 12288 Data size: 36696 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      top n: 20
+                      Top N Key Vectorization:
+                          className: VectorTopNKeyOperator
+                          keyExpressions: col 0:tinyint
+                          native: true
+                      Group By Operator
+                        Group By Vectorization:
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            keyExpressions: col 0:tinyint
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: []
+                        keys: ctinyint (type: tinyint)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 131 Data size: 264 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: tinyint)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: tinyint)
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkObjectHashOperator
+                              keyColumnNums: [0]
+                              native: true
+                              nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              partitionColumnNums: [0]
+                              valueColumnNums: []
+                          Statistics: Num rows: 131 Data size: 264 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          TopN Hash Memory Usage: 0.3
             Execution mode: vectorized, llap
             LLAP IO: all inputs
             Map Vectorization:
@@ -560,19 +569,19 @@ STAGE PLANS:
                 keys: KEY._col0 (type: tinyint)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Limit
                   Number of rows: 20
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE 
Column stats: COMPLETE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 20 Data size: 64 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 20 Data size: 44 Basic stats: 
COMPLETE Column stats: COMPLETE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/perf/tez/query10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
index 1b6adee..a8f097f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
@@ -140,188 +140,190 @@ Stage-0
     limit:100
     Stage-1
       Reducer 6 vectorized
-      File Output Operator [FS_224]
-        Limit [LIM_223] (rows=100 width=88)
+      File Output Operator [FS_225]
+        Limit [LIM_224] (rows=100 width=88)
           Number of rows:100
-          Select Operator [SEL_222] (rows=1045432122 width=88)
+          Select Operator [SEL_223] (rows=1045432122 width=88)
             
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
           <-Reducer 5 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_221]
-              Select Operator [SEL_220] (rows=1045432122 width=88)
+            SHUFFLE [RS_222]
+              Select Operator [SEL_221] (rows=1045432122 width=88)
                 
Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"]
-                Group By Operator [GBY_219] (rows=1045432122 width=88)
+                Group By Operator [GBY_220] (rows=1045432122 width=88)
                   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7
                 <-Reducer 4 [SIMPLE_EDGE]
                   SHUFFLE [RS_63]
                     PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
                     Group By Operator [GBY_62] (rows=2090864244 width=88)
                       
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6,
 _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                      Select Operator [SEL_61] (rows=2090864244 width=88)
-                        
Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
-                        Filter Operator [FIL_60] (rows=2090864244 width=88)
-                          predicate:(_col15 is not null or _col17 is not null)
-                          Merge Join Operator [MERGEJOIN_172] (rows=2090864244 
width=88)
-                            Conds:RS_55._col0=RS_56._col0(Left 
Semi),RS_55._col0=RS_210._col0(Left Outer),RS_55._col0=RS_218._col0(Left 
Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"]
-                          <-Reducer 3 [SIMPLE_EDGE]
-                            PARTITION_ONLY_SHUFFLE [RS_55]
-                              PartitionCols:_col0
-                              Merge Join Operator [MERGEJOIN_168] 
(rows=96800003 width=860)
-                                
Conds:RS_50._col1=RS_181._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
-                              <-Map 9 [SIMPLE_EDGE] vectorized
-                                SHUFFLE [RS_181]
-                                  PartitionCols:_col0
-                                  Select Operator [SEL_180] (rows=1861800 
width=385)
-                                    
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
-                                    Filter Operator [FIL_179] (rows=1861800 
width=385)
-                                      predicate:cd_demo_sk is not null
-                                      TableScan [TS_6] (rows=1861800 width=385)
-                                        
default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
-                              <-Reducer 2 [SIMPLE_EDGE]
-                                SHUFFLE [RS_50]
-                                  PartitionCols:_col1
-                                  Merge Join Operator [MERGEJOIN_167] 
(rows=88000001 width=860)
-                                    
Conds:RS_175._col2=RS_178._col0(Inner),Output:["_col0","_col1"]
-                                  <-Map 1 [SIMPLE_EDGE] vectorized
-                                    SHUFFLE [RS_175]
-                                      PartitionCols:_col2
-                                      Select Operator [SEL_174] (rows=80000000 
width=860)
-                                        Output:["_col0","_col1","_col2"]
-                                        Filter Operator [FIL_173] 
(rows=80000000 width=860)
-                                          predicate:(c_current_addr_sk is not 
null and c_current_cdemo_sk is not null and c_customer_sk is not null)
-                                          TableScan [TS_0] (rows=80000000 
width=860)
-                                            
default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
-                                  <-Map 8 [SIMPLE_EDGE] vectorized
-                                    SHUFFLE [RS_178]
-                                      PartitionCols:_col0
-                                      Select Operator [SEL_177] (rows=20000000 
width=1014)
-                                        Output:["_col0"]
-                                        Filter Operator [FIL_176] 
(rows=20000000 width=1014)
-                                          predicate:((ca_county) IN ('Walker 
County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana 
County') and ca_address_sk is not null)
-                                          TableScan [TS_3] (rows=40000000 
width=1014)
-                                            
default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"]
-                          <-Reducer 11 [SIMPLE_EDGE]
-                            SHUFFLE [RS_56]
-                              PartitionCols:_col0
-                              Group By Operator [GBY_54] (rows=633595212 
width=88)
-                                Output:["_col0"],keys:_col0
-                                Select Operator [SEL_18] (rows=633595212 
width=88)
-                                  Output:["_col0"]
-                                  Merge Join Operator [MERGEJOIN_169] 
(rows=633595212 width=88)
-                                    
Conds:RS_202._col0=RS_184._col0(Inner),Output:["_col1"]
-                                  <-Map 12 [SIMPLE_EDGE] vectorized
-                                    PARTITION_ONLY_SHUFFLE [RS_184]
+                      Top N Key Operator [TNK_103] (rows=2090864244 width=88)
+                        keys:_col6, _col7, _col8, _col9, _col10, _col11, 
_col12, _col13,sort order:++++++++,top n:100
+                        Select Operator [SEL_61] (rows=2090864244 width=88)
+                          
Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+                          Filter Operator [FIL_60] (rows=2090864244 width=88)
+                            predicate:(_col15 is not null or _col17 is not 
null)
+                            Merge Join Operator [MERGEJOIN_173] 
(rows=2090864244 width=88)
+                              Conds:RS_55._col0=RS_56._col0(Left 
Semi),RS_55._col0=RS_211._col0(Left Outer),RS_55._col0=RS_219._col0(Left 
Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"]
+                            <-Reducer 3 [SIMPLE_EDGE]
+                              PARTITION_ONLY_SHUFFLE [RS_55]
+                                PartitionCols:_col0
+                                Merge Join Operator [MERGEJOIN_169] 
(rows=96800003 width=860)
+                                  
Conds:RS_50._col1=RS_182._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+                                <-Map 9 [SIMPLE_EDGE] vectorized
+                                  SHUFFLE [RS_182]
+                                    PartitionCols:_col0
+                                    Select Operator [SEL_181] (rows=1861800 
width=385)
+                                      
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+                                      Filter Operator [FIL_180] (rows=1861800 
width=385)
+                                        predicate:cd_demo_sk is not null
+                                        TableScan [TS_6] (rows=1861800 
width=385)
+                                          
default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
+                                <-Reducer 2 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_50]
+                                    PartitionCols:_col1
+                                    Merge Join Operator [MERGEJOIN_168] 
(rows=88000001 width=860)
+                                      
Conds:RS_176._col2=RS_179._col0(Inner),Output:["_col0","_col1"]
+                                    <-Map 1 [SIMPLE_EDGE] vectorized
+                                      SHUFFLE [RS_176]
+                                        PartitionCols:_col2
+                                        Select Operator [SEL_175] 
(rows=80000000 width=860)
+                                          Output:["_col0","_col1","_col2"]
+                                          Filter Operator [FIL_174] 
(rows=80000000 width=860)
+                                            predicate:(c_current_addr_sk is 
not null and c_current_cdemo_sk is not null and c_customer_sk is not null)
+                                            TableScan [TS_0] (rows=80000000 
width=860)
+                                              
default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+                                    <-Map 8 [SIMPLE_EDGE] vectorized
+                                      SHUFFLE [RS_179]
+                                        PartitionCols:_col0
+                                        Select Operator [SEL_178] 
(rows=20000000 width=1014)
+                                          Output:["_col0"]
+                                          Filter Operator [FIL_177] 
(rows=20000000 width=1014)
+                                            predicate:((ca_county) IN ('Walker 
County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana 
County') and ca_address_sk is not null)
+                                            TableScan [TS_3] (rows=40000000 
width=1014)
+                                              
default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"]
+                            <-Reducer 11 [SIMPLE_EDGE]
+                              SHUFFLE [RS_56]
+                                PartitionCols:_col0
+                                Group By Operator [GBY_54] (rows=633595212 
width=88)
+                                  Output:["_col0"],keys:_col0
+                                  Select Operator [SEL_18] (rows=633595212 
width=88)
+                                    Output:["_col0"]
+                                    Merge Join Operator [MERGEJOIN_170] 
(rows=633595212 width=88)
+                                      
Conds:RS_203._col0=RS_185._col0(Inner),Output:["_col1"]
+                                    <-Map 12 [SIMPLE_EDGE] vectorized
+                                      PARTITION_ONLY_SHUFFLE [RS_185]
+                                        PartitionCols:_col0
+                                        Select Operator [SEL_184] (rows=4058 
width=1119)
+                                          Output:["_col0"]
+                                          Filter Operator [FIL_183] (rows=4058 
width=1119)
+                                            predicate:((d_year = 2002) and 
d_date_sk is not null and d_moy BETWEEN 4 AND 7)
+                                            TableScan [TS_12] (rows=73049 
width=1119)
+                                              
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+                                    <-Map 10 [SIMPLE_EDGE] vectorized
+                                      SHUFFLE [RS_203]
+                                        PartitionCols:_col0
+                                        Select Operator [SEL_202] 
(rows=575995635 width=88)
+                                          Output:["_col0","_col1"]
+                                          Filter Operator [FIL_201] 
(rows=575995635 width=88)
+                                            predicate:((ss_customer_sk BETWEEN 
DynamicValue(RS_55_c_c_customer_sk_min) AND 
DynamicValue(RS_55_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, 
DynamicValue(RS_55_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN 
DynamicValue(RS_16_date_dim_d_date_sk_min) AND 
DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, 
DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not 
null and ss_sold_date_sk is not null)
+                                            TableScan [TS_9] (rows=575995635 
width=88)
+                                              
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
+                                            <-Reducer 13 [BROADCAST_EDGE] 
vectorized
+                                              BROADCAST [RS_198]
+                                                Group By Operator [GBY_197] 
(rows=1 width=12)
+                                                  
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
+                                                <-Map 12 [CUSTOM_SIMPLE_EDGE] 
vectorized
+                                                  PARTITION_ONLY_SHUFFLE 
[RS_194]
+                                                    Group By Operator 
[GBY_191] (rows=1 width=12)
+                                                      
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
+                                                      Select Operator 
[SEL_186] (rows=4058 width=1119)
+                                                        Output:["_col0"]
+                                                         Please refer to the 
previous Select Operator [SEL_184]
+                                            <-Reducer 7 [BROADCAST_EDGE] 
vectorized
+                                              BROADCAST [RS_200]
+                                                Group By Operator [GBY_199] 
(rows=1 width=12)
+                                                  
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=96800000)"]
+                                                <-Reducer 3 
[CUSTOM_SIMPLE_EDGE]
+                                                  PARTITION_ONLY_SHUFFLE 
[RS_136]
+                                                    Group By Operator 
[GBY_135] (rows=1 width=12)
+                                                      
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=96800000)"]
+                                                      Select Operator 
[SEL_134] (rows=96800003 width=860)
+                                                        Output:["_col0"]
+                                                         Please refer to the 
previous Merge Join Operator [MERGEJOIN_169]
+                            <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized
+                              FORWARD [RS_211]
+                                PartitionCols:_col0
+                                Select Operator [SEL_210] (rows=79201469 
width=135)
+                                  Output:["_col0","_col1"]
+                                  Group By Operator [GBY_209] (rows=79201469 
width=135)
+                                    Output:["_col0"],keys:KEY._col0
+                                  <-Reducer 14 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_30]
                                       PartitionCols:_col0
-                                      Select Operator [SEL_183] (rows=4058 
width=1119)
-                                        Output:["_col0"]
-                                        Filter Operator [FIL_182] (rows=4058 
width=1119)
-                                          predicate:((d_year = 2002) and 
d_date_sk is not null and d_moy BETWEEN 4 AND 7)
-                                          TableScan [TS_12] (rows=73049 
width=1119)
-                                            
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
-                                  <-Map 10 [SIMPLE_EDGE] vectorized
-                                    SHUFFLE [RS_202]
+                                      Group By Operator [GBY_29] 
(rows=158402938 width=135)
+                                        Output:["_col0"],keys:_col1
+                                        Merge Join Operator [MERGEJOIN_171] 
(rows=158402938 width=135)
+                                          
Conds:RS_208._col0=RS_187._col0(Inner),Output:["_col1"]
+                                        <-Map 12 [SIMPLE_EDGE] vectorized
+                                          PARTITION_ONLY_SHUFFLE [RS_187]
+                                            PartitionCols:_col0
+                                             Please refer to the previous 
Select Operator [SEL_184]
+                                        <-Map 20 [SIMPLE_EDGE] vectorized
+                                          SHUFFLE [RS_208]
+                                            PartitionCols:_col0
+                                            Select Operator [SEL_207] 
(rows=144002668 width=135)
+                                              Output:["_col0","_col1"]
+                                              Filter Operator [FIL_206] 
(rows=144002668 width=135)
+                                                predicate:((ws_sold_date_sk 
BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND 
DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, 
DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk 
is not null and ws_sold_date_sk is not null)
+                                                TableScan [TS_19] 
(rows=144002668 width=135)
+                                                  
default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
+                                                <-Reducer 16 [BROADCAST_EDGE] 
vectorized
+                                                  BROADCAST [RS_205]
+                                                    Group By Operator 
[GBY_204] (rows=1 width=12)
+                                                      
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
+                                                    <-Map 12 
[CUSTOM_SIMPLE_EDGE] vectorized
+                                                      PARTITION_ONLY_SHUFFLE 
[RS_195]
+                                                        Group By Operator 
[GBY_192] (rows=1 width=12)
+                                                          
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
+                                                          Select Operator 
[SEL_188] (rows=4058 width=1119)
+                                                            Output:["_col0"]
+                                                             Please refer to 
the previous Select Operator [SEL_184]
+                            <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized
+                              FORWARD [RS_219]
+                                PartitionCols:_col0
+                                Select Operator [SEL_218] (rows=158394413 
width=135)
+                                  Output:["_col0","_col1"]
+                                  Group By Operator [GBY_217] (rows=158394413 
width=135)
+                                    Output:["_col0"],keys:KEY._col0
+                                  <-Reducer 17 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_44]
                                       PartitionCols:_col0
-                                      Select Operator [SEL_201] 
(rows=575995635 width=88)
-                                        Output:["_col0","_col1"]
-                                        Filter Operator [FIL_200] 
(rows=575995635 width=88)
-                                          predicate:((ss_customer_sk BETWEEN 
DynamicValue(RS_55_c_c_customer_sk_min) AND 
DynamicValue(RS_55_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, 
DynamicValue(RS_55_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN 
DynamicValue(RS_16_date_dim_d_date_sk_min) AND 
DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, 
DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not 
null and ss_sold_date_sk is not null)
-                                          TableScan [TS_9] (rows=575995635 
width=88)
-                                            
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
-                                          <-Reducer 13 [BROADCAST_EDGE] 
vectorized
-                                            BROADCAST [RS_197]
-                                              Group By Operator [GBY_196] 
(rows=1 width=12)
-                                                
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                              <-Map 12 [CUSTOM_SIMPLE_EDGE] 
vectorized
-                                                PARTITION_ONLY_SHUFFLE [RS_193]
-                                                  Group By Operator [GBY_190] 
(rows=1 width=12)
-                                                    
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                    Select Operator [SEL_185] 
(rows=4058 width=1119)
-                                                      Output:["_col0"]
-                                                       Please refer to the 
previous Select Operator [SEL_183]
-                                          <-Reducer 7 [BROADCAST_EDGE] 
vectorized
-                                            BROADCAST [RS_199]
-                                              Group By Operator [GBY_198] 
(rows=1 width=12)
-                                                
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=96800000)"]
-                                              <-Reducer 3 [CUSTOM_SIMPLE_EDGE]
-                                                PARTITION_ONLY_SHUFFLE [RS_135]
-                                                  Group By Operator [GBY_134] 
(rows=1 width=12)
-                                                    
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=96800000)"]
-                                                    Select Operator [SEL_133] 
(rows=96800003 width=860)
-                                                      Output:["_col0"]
-                                                       Please refer to the 
previous Merge Join Operator [MERGEJOIN_168]
-                          <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized
-                            FORWARD [RS_210]
-                              PartitionCols:_col0
-                              Select Operator [SEL_209] (rows=79201469 
width=135)
-                                Output:["_col0","_col1"]
-                                Group By Operator [GBY_208] (rows=79201469 
width=135)
-                                  Output:["_col0"],keys:KEY._col0
-                                <-Reducer 14 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_30]
-                                    PartitionCols:_col0
-                                    Group By Operator [GBY_29] (rows=158402938 
width=135)
-                                      Output:["_col0"],keys:_col1
-                                      Merge Join Operator [MERGEJOIN_170] 
(rows=158402938 width=135)
-                                        
Conds:RS_207._col0=RS_186._col0(Inner),Output:["_col1"]
-                                      <-Map 12 [SIMPLE_EDGE] vectorized
-                                        PARTITION_ONLY_SHUFFLE [RS_186]
-                                          PartitionCols:_col0
-                                           Please refer to the previous Select 
Operator [SEL_183]
-                                      <-Map 20 [SIMPLE_EDGE] vectorized
-                                        SHUFFLE [RS_207]
-                                          PartitionCols:_col0
-                                          Select Operator [SEL_206] 
(rows=144002668 width=135)
-                                            Output:["_col0","_col1"]
-                                            Filter Operator [FIL_205] 
(rows=144002668 width=135)
-                                              predicate:((ws_sold_date_sk 
BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND 
DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, 
DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk 
is not null and ws_sold_date_sk is not null)
-                                              TableScan [TS_19] 
(rows=144002668 width=135)
-                                                
default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
-                                              <-Reducer 16 [BROADCAST_EDGE] 
vectorized
-                                                BROADCAST [RS_204]
-                                                  Group By Operator [GBY_203] 
(rows=1 width=12)
-                                                    
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                                  <-Map 12 
[CUSTOM_SIMPLE_EDGE] vectorized
-                                                    PARTITION_ONLY_SHUFFLE 
[RS_194]
-                                                      Group By Operator 
[GBY_191] (rows=1 width=12)
-                                                        
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                        Select Operator 
[SEL_187] (rows=4058 width=1119)
-                                                          Output:["_col0"]
-                                                           Please refer to the 
previous Select Operator [SEL_183]
-                          <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized
-                            FORWARD [RS_218]
-                              PartitionCols:_col0
-                              Select Operator [SEL_217] (rows=158394413 
width=135)
-                                Output:["_col0","_col1"]
-                                Group By Operator [GBY_216] (rows=158394413 
width=135)
-                                  Output:["_col0"],keys:KEY._col0
-                                <-Reducer 17 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_44]
-                                    PartitionCols:_col0
-                                    Group By Operator [GBY_43] (rows=316788826 
width=135)
-                                      Output:["_col0"],keys:_col1
-                                      Merge Join Operator [MERGEJOIN_171] 
(rows=316788826 width=135)
-                                        
Conds:RS_215._col0=RS_188._col0(Inner),Output:["_col1"]
-                                      <-Map 12 [SIMPLE_EDGE] vectorized
-                                        PARTITION_ONLY_SHUFFLE [RS_188]
-                                          PartitionCols:_col0
-                                           Please refer to the previous Select 
Operator [SEL_183]
-                                      <-Map 21 [SIMPLE_EDGE] vectorized
-                                        SHUFFLE [RS_215]
-                                          PartitionCols:_col0
-                                          Select Operator [SEL_214] 
(rows=287989836 width=135)
-                                            Output:["_col0","_col1"]
-                                            Filter Operator [FIL_213] 
(rows=287989836 width=135)
-                                              predicate:((cs_sold_date_sk 
BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND 
DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, 
DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk 
is not null and cs_sold_date_sk is not null)
-                                              TableScan [TS_33] 
(rows=287989836 width=135)
-                                                
default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
-                                              <-Reducer 19 [BROADCAST_EDGE] 
vectorized
-                                                BROADCAST [RS_212]
-                                                  Group By Operator [GBY_211] 
(rows=1 width=12)
-                                                    
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                                  <-Map 12 
[CUSTOM_SIMPLE_EDGE] vectorized
-                                                    PARTITION_ONLY_SHUFFLE 
[RS_195]
-                                                      Group By Operator 
[GBY_192] (rows=1 width=12)
-                                                        
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                        Select Operator 
[SEL_189] (rows=4058 width=1119)
-                                                          Output:["_col0"]
-                                                           Please refer to the 
previous Select Operator [SEL_183]
+                                      Group By Operator [GBY_43] 
(rows=316788826 width=135)
+                                        Output:["_col0"],keys:_col1
+                                        Merge Join Operator [MERGEJOIN_172] 
(rows=316788826 width=135)
+                                          
Conds:RS_216._col0=RS_189._col0(Inner),Output:["_col1"]
+                                        <-Map 12 [SIMPLE_EDGE] vectorized
+                                          PARTITION_ONLY_SHUFFLE [RS_189]
+                                            PartitionCols:_col0
+                                             Please refer to the previous 
Select Operator [SEL_184]
+                                        <-Map 21 [SIMPLE_EDGE] vectorized
+                                          SHUFFLE [RS_216]
+                                            PartitionCols:_col0
+                                            Select Operator [SEL_215] 
(rows=287989836 width=135)
+                                              Output:["_col0","_col1"]
+                                              Filter Operator [FIL_214] 
(rows=287989836 width=135)
+                                                predicate:((cs_sold_date_sk 
BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND 
DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, 
DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk 
is not null and cs_sold_date_sk is not null)
+                                                TableScan [TS_33] 
(rows=287989836 width=135)
+                                                  
default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
+                                                <-Reducer 19 [BROADCAST_EDGE] 
vectorized
+                                                  BROADCAST [RS_213]
+                                                    Group By Operator 
[GBY_212] (rows=1 width=12)
+                                                      
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
+                                                    <-Map 12 
[CUSTOM_SIMPLE_EDGE] vectorized
+                                                      PARTITION_ONLY_SHUFFLE 
[RS_196]
+                                                        Group By Operator 
[GBY_193] (rows=1 width=12)
+                                                          
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
+                                                          Select Operator 
[SEL_190] (rows=4058 width=1119)
+                                                            Output:["_col0"]
+                                                             Please refer to 
the previous Select Operator [SEL_184]

[15/54] [abbrv] hive git commit: HIVE-17896: TopNKey: Create a standalone vectorizable TopNKey operator (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

Reply via email to