http://git-wip-us.apache.org/repos/asf/hive/blob/afa9ffee/ql/src/test/results/clientpositive/vector_windowing_streaming.q.out
----------------------------------------------------------------------
diff --git
a/ql/src/test/results/clientpositive/vector_windowing_streaming.q.out
b/ql/src/test/results/clientpositive/vector_windowing_streaming.q.out
deleted file mode 100644
index 9c07d4e..0000000
--- a/ql/src/test/results/clientpositive/vector_windowing_streaming.q.out
+++ /dev/null
@@ -1,849 +0,0 @@
-PREHOOK: query: drop table over10k
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table over10k
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: create table over10k(
- t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
- row format delimited
- fields terminated by '|'
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@over10k
-POSTHOOK: query: create table over10k(
- t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
- row format delimited
- fields terminated by '|'
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@over10k
-PREHOOK: query: load data local inpath '../../data/files/over10k' into table
over10k
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@over10k
-POSTHOOK: query: load data local inpath '../../data/files/over10k' into table
over10k
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@over10k
-PREHOOK: query: explain vectorization detail
-select *
-from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from
part) a
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select *
-from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from
part) a
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: part
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE
Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- projectedColumns: [p_partkey:int, p_name:string,
p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string,
p_retailprice:double, p_comment:string]
- Reduce Output Operator
- key expressions: p_mfgr (type: string), p_name (type: string)
- sort order: ++
- Map-reduce partition columns: p_mfgr (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true,
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true,
LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez,
spark] IS false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE
Column stats: NONE
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize
IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- includeColumns: [1, 2]
- dataColumns: p_partkey:int, p_name:string, p_mfgr:string,
p_brand:string, p_type:string, p_size:int, p_container:string,
p_retailprice:double, p_comment:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS
false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0
(type: string)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE
Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col1: string, _col2: string
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col1 ASC NULLS FIRST
- partition by: _col2
- raw input shape:
- window functions:
- window function definition
- alias: rank_window_0
- arguments: _col1
- name: rank
- window function: GenericUDAFRankEvaluator
- window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- isPivotResult: true
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: _col2 (type: string), rank_window_0 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE
Column stats: NONE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: explain vectorization detail
-select *
-from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from
part) a
-where r < 4
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select *
-from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from
part) a
-where r < 4
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: part
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE
Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- projectedColumns: [p_partkey:int, p_name:string,
p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string,
p_retailprice:double, p_comment:string]
- Reduce Output Operator
- key expressions: p_mfgr (type: string), p_name (type: string)
- sort order: ++
- Map-reduce partition columns: p_mfgr (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, No DISTINCT columns
IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS
true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez,
spark] IS false, No PTF TopN IS false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE
Column stats: NONE
- TopN Hash Memory Usage: 0.8
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize
IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- includeColumns: [1, 2]
- dataColumns: p_partkey:int, p_name:string, p_mfgr:string,
p_brand:string, p_type:string, p_size:int, p_container:string,
p_retailprice:double, p_comment:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS
false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0
(type: string)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE
Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col1: string, _col2: string
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col1 ASC NULLS FIRST
- partition by: _col2
- raw input shape:
- window functions:
- window function definition
- alias: rank_window_0
- arguments: _col1
- name: rank
- window function: GenericUDAFRankEvaluator
- window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- isPivotResult: true
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: (rank_window_0 < 4) (type: boolean)
- Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: _col2 (type: string), rank_window_0 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE
Column stats: NONE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select *
-from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from
part) a
-where r < 4
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select *
-from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from
part) a
-where r < 4
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-a.p_mfgr a.r
-Manufacturer#1 1
-Manufacturer#1 1
-Manufacturer#1 3
-Manufacturer#2 1
-Manufacturer#2 2
-Manufacturer#2 3
-Manufacturer#3 1
-Manufacturer#3 2
-Manufacturer#3 3
-Manufacturer#4 1
-Manufacturer#4 2
-Manufacturer#4 3
-Manufacturer#5 1
-Manufacturer#5 2
-Manufacturer#5 3
-PREHOOK: query: select *
-from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from
part) a
-where r < 2
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select *
-from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from
part) a
-where r < 2
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-a.p_mfgr a.r
-Manufacturer#1 1
-Manufacturer#1 1
-Manufacturer#2 1
-Manufacturer#3 1
-Manufacturer#4 1
-Manufacturer#5 1
-PREHOOK: query: explain vectorization detail
-select *
-from (select t, f, rank() over(partition by t order by f) r from over10k) a
-where r < 6 and t < 5
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select *
-from (select t, f, rank() over(partition by t order by f) r from over10k) a
-where r < 6 and t < 5
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: over10k
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE
Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint,
f:float, d:double, bo:boolean, s:string, ts:timestamp,
dec:decimal(4,2)/DECIMAL_64, bin:binary]
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterLongColLessLongScalar(col
0:tinyint, val 5)
- predicate: (t < 5) (type: boolean)
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE
Column stats: NONE
- Reduce Output Operator
- key expressions: t (type: tinyint), f (type: float)
- sort order: ++
- Map-reduce partition columns: t (type: tinyint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, No DISTINCT columns
IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS
true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez,
spark] IS false, No PTF TopN IS false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats:
COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.8
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize
IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- includeColumns: [0, 4]
- dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float,
d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64,
bin:binary
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS
false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1
(type: float)
- outputColumnNames: _col0, _col4
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE
Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col0: tinyint, _col4: float
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col4 ASC NULLS FIRST
- partition by: _col0
- raw input shape:
- window functions:
- window function definition
- alias: rank_window_0
- arguments: _col4
- name: rank
- window function: GenericUDAFRankEvaluator
- window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- isPivotResult: true
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: (rank_window_0 < 6) (type: boolean)
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: _col0 (type: tinyint), _col4 (type: float),
rank_window_0 (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 1017544 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats:
COMPLETE Column stats: NONE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select *
-from (select t, f, rank() over(partition by t order by f) r from over10k) a
-where r < 6 and t < 5
-PREHOOK: type: QUERY
-PREHOOK: Input: default@over10k
-#### A masked pattern was here ####
-POSTHOOK: query: select *
-from (select t, f, rank() over(partition by t order by f) r from over10k) a
-where r < 6 and t < 5
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@over10k
-#### A masked pattern was here ####
-a.t a.f a.r
--3 0.56 1
--3 0.83 2
--3 2.26 3
--3 2.48 4
--3 3.82 5
--2 1.55 1
--2 1.65 2
--2 1.79 3
--2 4.06 4
--2 4.4 5
--1 0.79 1
--1 0.95 2
--1 1.27 3
--1 1.49 4
--1 2.8 5
-0 0.08 1
-0 0.94 2
-0 1.44 3
-0 2.0 4
-0 2.12 5
-1 0.13 1
-1 0.44 2
-1 1.04 3
-1 3.41 4
-1 3.45 5
-2 2.21 1
-2 3.1 2
-2 9.93 3
-2 11.43 4
-2 15.45 5
-3 0.12 1
-3 0.19 2
-3 7.14 3
-3 7.97 4
-3 8.95 5
-4 2.26 1
-4 5.51 2
-4 5.53 3
-4 5.76 4
-4 7.26 5
-PREHOOK: query: select *
-from (select t, f, row_number() over(partition by t order by f) r from
over10k) a
-where r < 8 and t < 0
-PREHOOK: type: QUERY
-PREHOOK: Input: default@over10k
-#### A masked pattern was here ####
-POSTHOOK: query: select *
-from (select t, f, row_number() over(partition by t order by f) r from
over10k) a
-where r < 8 and t < 0
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@over10k
-#### A masked pattern was here ####
-a.t a.f a.r
--3 0.56 1
--3 0.83 2
--3 2.26 3
--3 2.48 4
--3 3.82 5
--3 6.8 6
--3 6.83 7
--2 1.55 1
--2 1.65 2
--2 1.79 3
--2 4.06 4
--2 4.4 5
--2 5.43 6
--2 5.59 7
--1 0.79 1
--1 0.95 2
--1 1.27 3
--1 1.49 4
--1 2.8 5
--1 4.08 6
--1 4.31 7
-PREHOOK: query: explain vectorization detail
-select * from (select ctinyint, cdouble, rank() over(partition by ctinyint
order by cdouble) r from alltypesorc) a where r < 5
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select * from (select ctinyint, cdouble, rank() over(partition by ctinyint
order by cdouble) r from alltypesorc) a where r < 5
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: alltypesorc
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: ctinyint (type: tinyint), cdouble (type: double)
- sort order: ++
- Map-reduce partition columns: ctinyint (type: tinyint)
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats:
COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.8
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1
(type: double)
- outputColumnNames: _col0, _col5
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE
Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col0: tinyint, _col5: double
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col5 ASC NULLS FIRST
- partition by: _col0
- raw input shape:
- window functions:
- window function definition
- alias: rank_window_0
- arguments: _col5
- name: rank
- window function: GenericUDAFRankEvaluator
- window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- isPivotResult: true
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats:
COMPLETE Column stats: NONE
- Filter Operator
- predicate: (rank_window_0 < 5) (type: boolean)
- Statistics: Num rows: 4096 Data size: 880654 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: tinyint), _col5 (type: double),
rank_window_0 (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 4096 Data size: 880654 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 4096 Data size: 880654 Basic stats:
COMPLETE Column stats: NONE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: drop table if exists sB
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table if exists sB
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: create table sB ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE as
-select * from (select ctinyint, cdouble, rank() over(partition by ctinyint
order by cdouble) r from alltypesorc) a where r < 5
-PREHOOK: type: CREATETABLE_AS_SELECT
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: database:default
-PREHOOK: Output: default@sB
-POSTHOOK: query: create table sB ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE as
-select * from (select ctinyint, cdouble, rank() over(partition by ctinyint
order by cdouble) r from alltypesorc) a where r < 5
-POSTHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@sB
-POSTHOOK: Lineage: sb.cdouble SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null),
]
-POSTHOOK: Lineage: sb.ctinyint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint,
comment:null), ]
-POSTHOOK: Lineage: sb.r SCRIPT
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cbigint,
type:bigint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring2,
type:string, comment:null),
(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2,
type:timestamp, comment:null),
(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean2,
type:boolean, comment:null
), ]
-a.ctinyint a.cdouble a.r
-PREHOOK: query: select * from sB
-where ctinyint is null
-PREHOOK: type: QUERY
-PREHOOK: Input: default@sb
-#### A masked pattern was here ####
-POSTHOOK: query: select * from sB
-where ctinyint is null
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@sb
-#### A masked pattern was here ####
-sb.ctinyint sb.cdouble sb.r
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-PREHOOK: query: drop table if exists sD
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table if exists sD
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: explain vectorization detail
-create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS
TEXTFILE as
-select * from (select ctinyint, cdouble, rank() over(partition by ctinyint
order by cdouble) r from alltypesorc) a where r < 5
-PREHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: query: explain vectorization detail
-create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS
TEXTFILE as
-select * from (select ctinyint, cdouble, rank() over(partition by ctinyint
order by cdouble) r from alltypesorc) a where r < 5
-POSTHOOK: type: CREATETABLE_AS_SELECT
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
- Stage-3 depends on stages: Stage-0
- Stage-2 depends on stages: Stage-3
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: alltypesorc
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats:
COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
- projectedColumns: [ctinyint:tinyint, csmallint:smallint,
cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string,
cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp,
cboolean1:boolean, cboolean2:boolean]
- Reduce Output Operator
- key expressions: ctinyint (type: tinyint), cdouble (type: double)
- sort order: ++
- Map-reduce partition columns: ctinyint (type: tinyint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, No DISTINCT columns
IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS
true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez,
spark] IS false, No PTF TopN IS false
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats:
COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.8
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS
true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [0, 5]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int,
cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string,
ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean,
cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS
false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1
(type: double)
- outputColumnNames: _col0, _col5
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE
Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col0: tinyint, _col5: double
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col5 ASC NULLS FIRST
- partition by: _col0
- raw input shape:
- window functions:
- window function definition
- alias: rank_window_0
- arguments: _col5
- name: rank
- window function: GenericUDAFRankEvaluator
- window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- isPivotResult: true
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats:
COMPLETE Column stats: NONE
- Filter Operator
- predicate: (rank_window_0 < 5) (type: boolean)
- Statistics: Num rows: 4096 Data size: 880654 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: tinyint), _col5 (type: double),
rank_window_0 (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 4096 Data size: 880654 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 4096 Data size: 880654 Basic stats:
COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.sD
-
- Stage: Stage-0
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-3
- Create Table Operator:
- Create Table
- columns: ctinyint tinyint, cdouble double, r int
- field delimiter: ,
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
- serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.sD
-
- Stage: Stage-2
- Stats Work
- Basic Stats Work:
-
-PREHOOK: query: create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE as
-select * from (select ctinyint, cdouble, rank() over(partition by ctinyint
order by cdouble) r from alltypesorc) a where r < 5
-PREHOOK: type: CREATETABLE_AS_SELECT
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: database:default
-PREHOOK: Output: default@sD
-POSTHOOK: query: create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE as
-select * from (select ctinyint, cdouble, rank() over(partition by ctinyint
order by cdouble) r from alltypesorc) a where r < 5
-POSTHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@sD
-POSTHOOK: Lineage: sd.cdouble SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null),
]
-POSTHOOK: Lineage: sd.ctinyint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint,
comment:null), ]
-POSTHOOK: Lineage: sd.r SCRIPT
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cbigint,
type:bigint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring2,
type:string, comment:null),
(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2,
type:timestamp, comment:null),
(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean,
comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean2,
type:boolean, comment:null
), ]
-a.ctinyint a.cdouble a.r
-PREHOOK: query: select * from sD
-where ctinyint is null
-PREHOOK: type: QUERY
-PREHOOK: Input: default@sd
-#### A masked pattern was here ####
-POSTHOOK: query: select * from sD
-where ctinyint is null
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@sd
-#### A masked pattern was here ####
-sd.ctinyint sd.cdouble sd.r
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1
-NULL NULL 1