http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_between_in.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out index 7d722d0..fbb43c4 100644 --- a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -12,14 +12,10 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc. POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate +POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -36,65 +32,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColumnInList(col 3, values [-67, -171]) -> boolean predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -107,14 +65,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) +POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE)) POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -131,77 +85,31 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsFalse(col 4)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 4:boolean) -> boolean predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 0) -> bigint - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -214,14 +122,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -238,65 +142,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterDecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> boolean predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -309,14 +175,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) +POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568) POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -333,77 +195,31 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsFalse(col 4)(children: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean) -> boolean predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 0) -> bigint - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -416,14 +232,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -440,65 +252,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColumnBetween(col 3, left -2, right 1) -> boolean predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -511,14 +285,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate +POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -535,65 +305,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColumnNotBetween(col 3, left -610, right 608) -> boolean predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [3] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -606,14 +338,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 +POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -630,65 +358,27 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterDecimalColumnBetween(col 1, left -20, right 45.9918918919) -> boolean predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(20,10)) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(20,10)) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -701,14 +391,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 +POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -725,77 +411,31 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterDecimalColumnNotBetween(col 1, left -2000, right 4390.1351351351) -> boolean predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Select Operator - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [] Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 0) -> bigint - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1055,16 +695,12 @@ POSTHOOK: Input: default@decimal_date_test 6172 PREHOOK: query: -- projections -EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY POSTHOOK: query: -- projections -EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 +EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1081,27 +717,12 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [4] - selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - Group By Vectorization: - aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 4 - native: false - projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -1110,50 +731,20 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1166,14 +757,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1190,27 +777,12 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [4] - selectExpressions: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - Group By Vectorization: - aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 4 - native: false - projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -1219,50 +791,20 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1275,14 +817,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1299,27 +837,12 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [4] - selectExpressions: VectorUDFAdaptor(cdate BETWEEN 1969-12-30 AND 1970-01-02) -> 4:Long Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - Group By Vectorization: - aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 4 - native: false - projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -1328,50 +851,20 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1384,14 +877,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 +POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1408,27 +897,12 @@ STAGE PLANS: TableScan alias: decimal_date_test Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [4] - selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 4:Long Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - Group By Vectorization: - aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 4 - native: false - projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -1437,50 +911,20 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index bf6c494..424a2c9 100644 --- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -97,24 +97,20 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT +PREHOOK: query: EXPLAIN SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT +POSTHOOK: query: EXPLAIN SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -132,27 +128,12 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: i (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [2] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(50), avg(50.0), avg(50) - Group By Vectorization: - aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct<count:bigint,sum:double>, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct<count:bigint,sum:double>, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct<count:bigint,sum:decimal(20,0)> - className: VectorGroupByOperator - vectorOutput: false - keyExpressions: col 2 - native: false - projectedOutputColumns: [0, 1, 2] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct<count:bigint,sum:decimal(20,0)> output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -165,20 +146,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: struct<count:bigint,sum:decimal(12,0),input:decimal(10,0)>) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:int> of Column[VALUE._col0] not supported - vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) @@ -194,33 +162,16 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_char_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_char_4.q.out b/ql/src/test/results/clientpositive/spark/vector_char_4.q.out index 943a4b1..3e551bb 100644 --- a/ql/src/test/results/clientpositive/spark/vector_char_4.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_char_4.q.out @@ -121,16 +121,12 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -146,23 +142,12 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -170,14 +155,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true Stage: Stage-0 Move Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index 4322dc7..73272fb 100644 --- a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1231,18 +1231,14 @@ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, type:decimal(7,2), comment:null), ] PREHOOK: query: ------------------------------------------------------------------------------------------ -explain vectorization expression +explain select count(distinct ws_order_number) from web_sales PREHOOK: type: QUERY POSTHOOK: query: ------------------------------------------------------------------------------------------ -explain vectorization expression +explain select count(distinct ws_order_number) from web_sales POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1260,24 +1256,11 @@ STAGE PLANS: TableScan alias: web_sales Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] Select Operator expressions: ws_order_number (type: int) outputColumnNames: ws_order_number - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [16] Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 16 - native: false - projectedOutputColumns: [] keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 @@ -1286,88 +1269,35 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 1752000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0) - Group By Vectorization: - aggregators: VectorUDAFCount(col 0) -> bigint - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 0) -> bigint - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat