http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out new file mode 100644 index 0000000..f01e808 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out @@ -0,0 +1,480 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cboolean1, + cbigint, + csmallint, + ctinyint, + ctimestamp1, + cstring1, + (cbigint + cbigint) as c1, + (csmallint % -257) as c2, + (-(csmallint)) as c3, + (-(ctinyint)) as c4, + ((-(ctinyint)) + 17) as c5, + (cbigint * (-(csmallint))) as c6, + (cint % csmallint) as c7, + (-(ctinyint)) as c8, + ((-(ctinyint)) % ctinyint) as c9 +FROM alltypesparquet +WHERE ((ctinyint != 0) + AND (((ctimestamp1 <= 0) + OR ((ctinyint = cint) + OR (cstring2 LIKE 'ss'))) + AND ((988888 < cdouble) + OR ((ctimestamp2 > -15) + AND (3569 >= cdouble))))) +ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 25 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cboolean1, + cbigint, + csmallint, + ctinyint, + ctimestamp1, + cstring1, + (cbigint + cbigint) as c1, + (csmallint % -257) as c2, + (-(csmallint)) as c3, + (-(ctinyint)) as c4, + ((-(ctinyint)) + 17) as c5, + (cbigint * (-(csmallint))) as c6, + (cint % csmallint) as c7, + (-(ctinyint)) as c8, + ((-(ctinyint)) % ctinyint) as c9 +FROM alltypesparquet +WHERE ((ctinyint != 0) + AND (((ctimestamp1 <= 0) + OR ((ctinyint = cint) + OR (cstring2 LIKE 'ss'))) + AND ((988888 < cdouble) + OR ((ctimestamp2 > -15) + AND (3569 >= cdouble))))) +ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 25 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val 0.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5:double), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -15.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5:double)))) + predicate: (((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble))) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0)) (type: boolean) + Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] + selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 14:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 15:int, LongColUnaryMinus(col 1:smallint) -> 16:smallint, LongColUnaryMinus(col 0:tinyint) -> 17:tinyint, LongColAddLongScalar(col 18:int, val 17)(children: col 18:tinyint) -> 19:int, LongColMultiplyLongColumn(col 3:bigint, col 18:bigint)(children: col 18:smallint) -> 20:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 18:int, LongColUnaryMinus(col 0:tinyint) -> 21:tinyint, LongColModuloLongColumn(col 22:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 22:tinyint) -> 23:tinyint + Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) + sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 5, 6, 7, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [double, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aaaaaaaaaaaaaaa + reduceColumnSortOrder: +++++++++++++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 15 + dataColumns: KEY.reducesinkkey0:boolean, KEY.reducesinkkey1:bigint, KEY.reducesinkkey2:smallint, KEY.reducesinkkey3:tinyint, KEY.reducesinkkey4:timestamp, KEY.reducesinkkey5:string, KEY.reducesinkkey6:bigint, KEY.reducesinkkey7:int, KEY.reducesinkkey8:smallint, KEY.reducesinkkey9:tinyint, KEY.reducesinkkey10:int, KEY.reducesinkkey11:bigint, KEY.reducesinkkey12:int, KEY.reducesinkkey13:tinyint, KEY.reducesinkkey14:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] + Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 25 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cboolean1, + cbigint, + csmallint, + ctinyint, + ctimestamp1, + cstring1, + (cbigint + cbigint) as c1, + (csmallint % -257) as c2, + (-(csmallint)) as c3, + (-(ctinyint)) as c4, + ((-(ctinyint)) + 17) as c5, + (cbigint * (-(csmallint))) as c6, + (cint % csmallint) as c7, + (-(ctinyint)) as c8, + ((-(ctinyint)) % ctinyint) as c9 +FROM alltypesparquet +WHERE ((ctinyint != 0) + AND (((ctimestamp1 <= 0) + OR ((ctinyint = cint) + OR (cstring2 LIKE 'ss'))) + AND ((988888 < cdouble) + OR ((ctimestamp2 > -15) + AND (3569 >= cdouble))))) +ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT cboolean1, + cbigint, + csmallint, + ctinyint, + ctimestamp1, + cstring1, + (cbigint + cbigint) as c1, + (csmallint % -257) as c2, + (-(csmallint)) as c3, + (-(ctinyint)) as c4, + ((-(ctinyint)) + 17) as c5, + (cbigint * (-(csmallint))) as c6, + (cint % csmallint) as c7, + (-(ctinyint)) as c8, + ((-(ctinyint)) % ctinyint) as c9 +FROM alltypesparquet +WHERE ((ctinyint != 0) + AND (((ctimestamp1 <= 0) + OR ((ctinyint = cint) + OR (cstring2 LIKE 'ss'))) + AND ((988888 < cdouble) + OR ((ctimestamp2 > -15) + AND (3569 >= cdouble))))) +ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +NULL -2118149242 -7196 56 1969-12-31 15:59:50.462 NULL -4236298484 0 7196 -56 -39 -15242201945432 NULL -56 0 +NULL -2121399625 -7196 27 1969-12-31 15:59:50.046 NULL -4242799250 0 7196 -27 -10 -15265591701500 NULL -27 0 +NULL -2124802690 -7196 -6 1969-12-31 15:59:57.92 NULL -4249605380 0 7196 6 23 -15290080157240 NULL 6 0 +NULL -2128720310 -7196 -52 1969-12-31 15:59:45.978 NULL -4257440620 0 7196 52 69 -15318271350760 NULL 52 0 +NULL -2132232110 -200 60 1969-12-31 15:59:47.019 NULL -4264464220 -200 200 -60 -43 -426446422000 NULL -60 0 +NULL -2132536965 -7196 9 1969-12-31 15:59:46 NULL -4265073930 0 7196 -9 8 -15345736000140 NULL -9 0 +NULL -2135141157 -7196 50 1969-12-31 15:59:50.192 NULL -4270282314 0 7196 -50 -33 -15364475765772 NULL -50 0 +NULL -2137537679 -7196 -25 1969-12-31 15:59:50.136 NULL -4275075358 0 7196 25 42 -15381721138084 NULL 25 0 +NULL -2145481991 -7196 56 1969-12-31 15:59:55.667 NULL -4290963982 0 7196 -56 -39 -15438888407236 NULL -56 0 +NULL NULL -200 -36 1969-12-31 15:59:57.241 NULL NULL -200 200 36 53 NULL NULL 36 0 +NULL NULL -200 -43 1969-12-31 15:59:53.783 NULL NULL -200 200 43 60 NULL NULL 43 0 +NULL NULL -200 -58 1969-12-31 15:59:51.115 NULL NULL -200 200 58 75 NULL NULL 58 0 +NULL NULL -200 22 1969-12-31 15:59:50.109 NULL NULL -200 200 -22 -5 NULL NULL -22 0 +NULL NULL -200 3 1969-12-31 15:59:50.489 NULL NULL -200 200 -3 14 NULL NULL -3 0 +NULL NULL -200 43 1969-12-31 15:59:57.003 NULL NULL -200 200 -43 -26 NULL NULL -43 0 +NULL NULL -200 53 1969-12-31 15:59:49.46 NULL NULL -200 200 -53 -36 NULL NULL -53 0 +NULL NULL -200 9 1969-12-31 15:59:44.108 NULL NULL -200 200 -9 8 NULL NULL -9 0 +NULL NULL -7196 -38 1969-12-31 15:59:53.503 NULL NULL 0 7196 38 55 NULL NULL 38 0 +NULL NULL -7196 -49 1969-12-31 15:59:51.009 NULL NULL 0 7196 49 66 NULL NULL 49 0 +NULL NULL -7196 -49 1969-12-31 15:59:52.052 NULL NULL 0 7196 49 66 NULL NULL 49 0 +NULL NULL -7196 -50 1969-12-31 15:59:52.424 NULL NULL 0 7196 50 67 NULL NULL 50 0 +NULL NULL -7196 -61 1969-12-31 15:59:44.823 NULL NULL 0 7196 61 78 NULL NULL 61 0 +NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0 +NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0 +NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0 +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, + cbigint, + csmallint, + ctinyint, + ctimestamp1, + cstring1, + (cbigint + cbigint) as c1, + (csmallint % -257) as c2, + (-(csmallint)) as c3, + (-(ctinyint)) as c4, + ((-(ctinyint)) + 17) as c5, + (cbigint * (-(csmallint))) as c6, + (cint % csmallint) as c7, + (-(ctinyint)) as c8, + ((-(ctinyint)) % ctinyint) as c9 +FROM alltypesparquet +WHERE ((ctinyint != 0) + AND (((ctimestamp1 <= 0.0) + OR ((ctinyint = cint) + OR (cstring2 LIKE 'ss'))) + AND ((988888 < cdouble) + OR ((ctimestamp2 > 7.6850000000000005) + AND (3569 >= cdouble))))) +ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 25 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT cboolean1, + cbigint, + csmallint, + ctinyint, + ctimestamp1, + cstring1, + (cbigint + cbigint) as c1, + (csmallint % -257) as c2, + (-(csmallint)) as c3, + (-(ctinyint)) as c4, + ((-(ctinyint)) + 17) as c5, + (cbigint * (-(csmallint))) as c6, + (cint % csmallint) as c7, + (-(ctinyint)) as c8, + ((-(ctinyint)) % ctinyint) as c9 +FROM alltypesparquet +WHERE ((ctinyint != 0) + AND (((ctimestamp1 <= 0.0) + OR ((ctinyint = cint) + OR (cstring2 LIKE 'ss'))) + AND ((988888 < cdouble) + OR ((ctimestamp2 > 7.6850000000000005) + AND (3569 >= cdouble))))) +ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 25 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val 0.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5:double), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val 7.6850000000000005)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5:double)))) + predicate: (((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble))) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0)) (type: boolean) + Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] + selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 14:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 15:int, LongColUnaryMinus(col 1:smallint) -> 16:smallint, LongColUnaryMinus(col 0:tinyint) -> 17:tinyint, LongColAddLongScalar(col 18:int, val 17)(children: col 18:tinyint) -> 19:int, LongColMultiplyLongColumn(col 3:bigint, col 18:bigint)(children: col 18:smallint) -> 20:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 18:int, LongColUnaryMinus(col 0:tinyint) -> 21:tinyint, LongColModuloLongColumn(col 22:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 22:tinyint) -> 23:tinyint + Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) + sort order: +++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] + Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 25 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 25 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cboolean1, + cbigint, + csmallint, + ctinyint, + ctimestamp1, + cstring1, + (cbigint + cbigint) as c1, + (csmallint % -257) as c2, + (-(csmallint)) as c3, + (-(ctinyint)) as c4, + ((-(ctinyint)) + 17) as c5, + (cbigint * (-(csmallint))) as c6, + (cint % csmallint) as c7, + (-(ctinyint)) as c8, + ((-(ctinyint)) % ctinyint) as c9 +FROM alltypesparquet +WHERE ((ctinyint != 0) + AND (((ctimestamp1 <= 0.0) + OR ((ctinyint = cint) + OR (cstring2 LIKE 'ss'))) + AND ((988888 < cdouble) + OR ((ctimestamp2 > 7.6850000000000005) + AND (3569 >= cdouble))))) +ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT cboolean1, + cbigint, + csmallint, + ctinyint, + ctimestamp1, + cstring1, + (cbigint + cbigint) as c1, + (csmallint % -257) as c2, + (-(csmallint)) as c3, + (-(ctinyint)) as c4, + ((-(ctinyint)) + 17) as c5, + (cbigint * (-(csmallint))) as c6, + (cint % csmallint) as c7, + (-(ctinyint)) as c8, + ((-(ctinyint)) % ctinyint) as c9 +FROM alltypesparquet +WHERE ((ctinyint != 0) + AND (((ctimestamp1 <= 0.0) + OR ((ctinyint = cint) + OR (cstring2 LIKE 'ss'))) + AND ((988888 < cdouble) + OR ((ctimestamp2 > 7.6850000000000005) + AND (3569 >= cdouble))))) +ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +true NULL -14584 -7 1969-12-31 15:59:46.965 1cGVWH7n1QU NULL -192 14584 7 24 NULL 5864 7 0 +true NULL -14739 4 1969-12-31 15:59:55.188 cvLH6Eat2yFsyy7p NULL -90 14739 -4 13 NULL 8966 -4 0 +true NULL -14771 13 1969-12-31 15:59:58.839 821UdmGbkEf4j NULL -122 14771 -13 4 NULL 12897 -13 0 +true NULL -15149 20 1969-12-31 15:59:46.575 1cGVWH7n1QU NULL -243 15149 -20 -3 NULL 10520 -20 0 +true NULL -15344 48 1969-12-31 15:59:54.853 821UdmGbkEf4j NULL -181 15344 -48 -31 NULL 13871 -48 0 +true NULL -15388 49 1969-12-31 15:59:58.545 cvLH6Eat2yFsyy7p NULL -225 15388 -49 -32 NULL 3131 -49 0 +true NULL -15431 -11 1969-12-31 15:59:52.176 cvLH6Eat2yFsyy7p NULL -11 15431 11 28 NULL 7586 11 0 +true NULL -15435 -21 1969-12-31 15:59:54.705 1cGVWH7n1QU NULL -15 15435 21 38 NULL 6586 21 0 +true NULL -15450 -34 1969-12-31 15:59:55.681 cvLH6Eat2yFsyy7p NULL -30 15450 34 51 NULL 5717 34 0 +true NULL -15478 7 1969-12-31 15:59:56.809 cvLH6Eat2yFsyy7p NULL -58 15478 -7 10 NULL 7501 -7 0 +true NULL -15555 -2 1969-12-31 15:59:58.773 1cGVWH7n1QU NULL -135 15555 2 19 NULL 9991 2 0 +true NULL -15659 -11 1969-12-31 15:59:48.466 cvLH6Eat2yFsyy7p NULL -239 15659 11 28 NULL 12199 11 0 +true NULL -15711 -60 1969-12-31 15:59:53.115 821UdmGbkEf4j NULL -34 15711 60 77 NULL 7646 60 0 +true NULL -15770 -23 1969-12-31 15:59:56.822 1cGVWH7n1QU NULL -93 15770 23 40 NULL 4926 23 0 +true NULL -15794 53 1969-12-31 15:59:53.749 cvLH6Eat2yFsyy7p NULL -117 15794 -53 -36 NULL 4351 -53 0 +true NULL -15892 29 1969-12-31 15:59:57.937 821UdmGbkEf4j NULL -215 15892 -29 -12 NULL 171 -29 0 +true NULL -15899 50 1969-12-31 15:59:46.926 821UdmGbkEf4j NULL -222 15899 -50 -33 NULL 10210 -50 0 +true NULL -15920 -64 1969-12-31 15:59:51.859 cvLH6Eat2yFsyy7p NULL -243 15920 64 81 NULL 6687 64 0 +true NULL -15922 -17 1969-12-31 15:59:46.164 821UdmGbkEf4j NULL -245 15922 17 34 NULL 10851 17 0 +true NULL -15980 -6 1969-12-31 15:59:54.84 1cGVWH7n1QU NULL -46 15980 6 23 NULL 14836 6 0 +true NULL -16017 -21 1969-12-31 15:59:44.02 821UdmGbkEf4j NULL -83 16017 21 38 NULL 2282 21 0 +true NULL -16036 -15 1969-12-31 15:59:58.681 1cGVWH7n1QU NULL -102 16036 15 32 NULL 7928 15 0 +true NULL -16076 59 1969-12-31 15:59:55.023 821UdmGbkEf4j NULL -142 16076 -59 -42 NULL 7907 -59 0 +true NULL -16122 50 1969-12-31 15:59:51.608 1cGVWH7n1QU NULL -188 16122 -50 -33 NULL 1828 -50 0 +true NULL -16153 35 1969-12-31 15:59:52.036 1cGVWH7n1QU NULL -219 16153 -35 -18 NULL 14817 -35 0
http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out new file mode 100644 index 0000000..b4f17c4 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out @@ -0,0 +1,454 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT ctimestamp1, + cdouble, + cboolean1, + cstring1, + cfloat, + (-(cdouble)) as c1, + (-5638.15 - cdouble) as c2, + (cdouble * -257) as c3, + (cint + cfloat) as c4, + ((-(cdouble)) + cbigint) as c5, + (-(cdouble)) as c6, + (-1.389 - cfloat) as c7, + (-(cfloat)) as c8, + ((-5638.15 - cdouble) + (cint + cfloat)) as c9 +FROM alltypesparquet +WHERE (((cstring2 IS NOT NULL) + AND ((ctimestamp1 <= 10) + AND (ctimestamp2 != 16))) + OR ((cfloat < -6432) + OR ((cboolean1 IS NOT NULL) + AND (cdouble = 988888)))) +ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT ctimestamp1, + cdouble, + cboolean1, + cstring1, + cfloat, + (-(cdouble)) as c1, + (-5638.15 - cdouble) as c2, + (cdouble * -257) as c3, + (cint + cfloat) as c4, + ((-(cdouble)) + cbigint) as c5, + (-(cdouble)) as c6, + (-1.389 - cfloat) as c7, + (-(cfloat)) as c8, + ((-5638.15 - cdouble) + (cint + cfloat)) as c9 +FROM alltypesparquet +WHERE (((cstring2 IS NOT NULL) + AND ((ctimestamp1 <= 10) + AND (ctimestamp2 != 16))) + OR ((cfloat < -6432) + OR ((cboolean1 IS NOT NULL) + AND (cdouble = 988888)))) +ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7:string), FilterDoubleColLessEqualDoubleScalar(col 13:double, val 10.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 13:double, val 16.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double)), FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0))) + predicate: ((cboolean1 is not null and (cdouble = 988888.0)) or (cfloat < -6432) or (cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0))) (type: boolean) + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] + selectExpressions: DoubleColUnaryMinus(col 5:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 14:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 15:double, DoubleColAddDoubleColumn(col 16:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 16:float) -> 17:float, DoubleColAddDoubleColumn(col 16:double, col 18:double)(children: DoubleColUnaryMinus(col 5:double) -> 16:double, CastLongToDouble(col 3:bigint) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 18:float, DoubleColUnaryMinus(col 4:float) -> 20:float, DoubleColAddDoubleColumn(col 21:double, col 23:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 21:double, col 23:float) -> 22:double + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) + sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double, double, double, double, double, double, double, double] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aaaaaaaaaaaaaa + reduceColumnSortOrder: ++++++++++++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:double, KEY.reducesinkkey2:boolean, KEY.reducesinkkey3:string, KEY.reducesinkkey4:float, KEY.reducesinkkey5:double, KEY.reducesinkkey6:double, KEY.reducesinkkey7:double, KEY.reducesinkkey8:float, KEY.reducesinkkey9:double, KEY.reducesinkkey10:double, KEY.reducesinkkey11:float, KEY.reducesinkkey12:float, KEY.reducesinkkey13:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ctimestamp1, + cdouble, + cboolean1, + cstring1, + cfloat, + (-(cdouble)) as c1, + (-5638.15 - cdouble) as c2, + (cdouble * -257) as c3, + (cint + cfloat) as c4, + ((-(cdouble)) + cbigint) as c5, + (-(cdouble)) as c6, + (-1.389 - cfloat) as c7, + (-(cfloat)) as c8, + ((-5638.15 - cdouble) + (cint + cfloat)) as c9 +FROM alltypesparquet +WHERE (((cstring2 IS NOT NULL) + AND ((ctimestamp1 <= 10) + AND (ctimestamp2 != 16))) + OR ((cfloat < -6432) + OR ((cboolean1 IS NOT NULL) + AND (cdouble = 988888)))) +ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctimestamp1, + cdouble, + cboolean1, + cstring1, + cfloat, + (-(cdouble)) as c1, + (-5638.15 - cdouble) as c2, + (cdouble * -257) as c3, + (cint + cfloat) as c4, + ((-(cdouble)) + cbigint) as c5, + (-(cdouble)) as c6, + (-1.389 - cfloat) as c7, + (-(cfloat)) as c8, + ((-5638.15 - cdouble) + (cint + cfloat)) as c9 +FROM alltypesparquet +WHERE (((cstring2 IS NOT NULL) + AND ((ctimestamp1 <= 10) + AND (ctimestamp2 != 16))) + OR ((cfloat < -6432) + OR ((cboolean1 IS NOT NULL) + AND (cdouble = 988888)))) +ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +1969-12-31 15:59:30.929 -200.0 NULL NULL 12.0 200.0 -5438.15 51400.0 NULL 2.128824065E9 200.0 -13.389 -12.0 NULL +1969-12-31 15:59:30.929 -200.0 NULL NULL 42.0 200.0 -5438.15 51400.0 NULL -5.78935585E8 200.0 -43.389 -42.0 NULL +1969-12-31 15:59:30.929 -7196.0 NULL NULL 49.0 7196.0 1557.8500000000004 1849372.0 NULL 1.948230809E9 7196.0 -50.389 -49.0 NULL +1969-12-31 15:59:30.929 15601.0 NULL NULL -54.0 -15601.0 -21239.15 -4009457.0 NULL -1.114185408E9 -15601.0 52.611 54.0 NULL +1969-12-31 15:59:43.628 -200.0 NULL NULL 23.0 200.0 -5438.15 51400.0 NULL 2.034056161E9 200.0 -24.389 -23.0 NULL +1969-12-31 15:59:43.637 -7196.0 NULL NULL -44.0 7196.0 1557.8500000000004 1849372.0 NULL 1.100301451E9 7196.0 42.611 44.0 NULL +1969-12-31 15:59:43.64 -7196.0 NULL NULL -51.0 7196.0 1557.8500000000004 1849372.0 NULL -1.339157623E9 7196.0 49.611 51.0 NULL +1969-12-31 15:59:43.661 -200.0 NULL NULL 13.0 200.0 -5438.15 51400.0 NULL -6.8063459E7 200.0 -14.389 -13.0 NULL +1969-12-31 15:59:43.676 -200.0 NULL NULL -10.0 200.0 -5438.15 51400.0 NULL 8.34296111E8 200.0 8.611 10.0 NULL +1969-12-31 15:59:43.705 15601.0 NULL NULL -12.0 -15601.0 -21239.15 -4009457.0 NULL 5.95398978E8 -15601.0 10.611 12.0 NULL +1969-12-31 15:59:43.709 15601.0 NULL NULL 61.0 -15601.0 -21239.15 -4009457.0 NULL -1.55652257E8 -15601.0 -62.389 -61.0 NULL +1969-12-31 15:59:43.72 -200.0 NULL NULL -18.0 200.0 -5438.15 51400.0 NULL 1.809037075E9 200.0 16.611 18.0 NULL +1969-12-31 15:59:43.721 15601.0 NULL NULL 57.0 -15601.0 -21239.15 -4009457.0 NULL -1.465672807E9 -15601.0 -58.389 -57.0 NULL +1969-12-31 15:59:43.749 -7196.0 NULL NULL -39.0 7196.0 1557.8500000000004 1849372.0 NULL 3.96436076E8 7196.0 37.611 39.0 NULL +1969-12-31 15:59:43.771 15601.0 NULL NULL 49.0 -15601.0 -21239.15 -4009457.0 NULL -1.10733693E9 -15601.0 -50.389 -49.0 NULL +1969-12-31 15:59:43.773 -200.0 NULL NULL -24.0 200.0 -5438.15 51400.0 NULL -1.046049502E9 200.0 22.611 24.0 NULL +1969-12-31 15:59:43.782 -7196.0 NULL NULL 51.0 7196.0 1557.8500000000004 1849372.0 NULL -1.607339819E9 7196.0 -52.389 -51.0 NULL +1969-12-31 15:59:43.783 -200.0 NULL NULL -11.0 200.0 -5438.15 51400.0 NULL 1.2116287E7 200.0 9.611 11.0 NULL +1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL +1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, + cdouble, + cboolean1, + cstring1, + cfloat, + (-(cdouble)) as c1, + (-5638.15 - cdouble) as c2, + (cdouble * -257) as c3, + (cint + cfloat) as c4, + ((-(cdouble)) + cbigint) as c5, + (-(cdouble)) as c6, + (-1.389 - cfloat) as c7, + (-(cfloat)) as c8, + ((-5638.15 - cdouble) + (cint + cfloat)) as c9 +FROM alltypesparquet +WHERE (((cstring2 IS NOT NULL) + AND ((ctimestamp1 <= 12.503) + AND (ctimestamp2 != 11.998))) + OR ((cfloat < -6432) + OR ((cboolean1 IS NOT NULL) + AND (cdouble = 988888)))) +ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT ctimestamp1, + cdouble, + cboolean1, + cstring1, + cfloat, + (-(cdouble)) as c1, + (-5638.15 - cdouble) as c2, + (cdouble * -257) as c3, + (cint + cfloat) as c4, + ((-(cdouble)) + cbigint) as c5, + (-(cdouble)) as c6, + (-1.389 - cfloat) as c7, + (-(cfloat)) as c8, + ((-5638.15 - cdouble) + (cint + cfloat)) as c9 +FROM alltypesparquet +WHERE (((cstring2 IS NOT NULL) + AND ((ctimestamp1 <= 12.503) + AND (ctimestamp2 != 11.998))) + OR ((cfloat < -6432) + OR ((cboolean1 IS NOT NULL) + AND (cdouble = 988888)))) +ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7:string), FilterDoubleColLessEqualDoubleScalar(col 13:double, val 12.503)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 13:double, val 11.998)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double)), FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0))) + predicate: ((cboolean1 is not null and (cdouble = 988888.0)) or (cfloat < -6432) or (cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998))) (type: boolean) + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] + selectExpressions: DoubleColUnaryMinus(col 5:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 14:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 15:double, DoubleColAddDoubleColumn(col 16:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 16:float) -> 17:float, DoubleColAddDoubleColumn(col 16:double, col 18:double)(children: DoubleColUnaryMinus(col 5:double) -> 16:double, CastLongToDouble(col 3:bigint) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 18:float, DoubleColUnaryMinus(col 4:float) -> 20:float, DoubleColAddDoubleColumn(col 21:double, col 23:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 21:double, col 23:float) -> 22:double + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) + sort order: ++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ctimestamp1, + cdouble, + cboolean1, + cstring1, + cfloat, + (-(cdouble)) as c1, + (-5638.15 - cdouble) as c2, + (cdouble * -257) as c3, + (cint + cfloat) as c4, + ((-(cdouble)) + cbigint) as c5, + (-(cdouble)) as c6, + (-1.389 - cfloat) as c7, + (-(cfloat)) as c8, + ((-5638.15 - cdouble) + (cint + cfloat)) as c9 +FROM alltypesparquet +WHERE (((cstring2 IS NOT NULL) + AND ((ctimestamp1 <= 12.503) + AND (ctimestamp2 != 11.998))) + OR ((cfloat < -6432) + OR ((cboolean1 IS NOT NULL) + AND (cdouble = 988888)))) +ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctimestamp1, + cdouble, + cboolean1, + cstring1, + cfloat, + (-(cdouble)) as c1, + (-5638.15 - cdouble) as c2, + (cdouble * -257) as c3, + (cint + cfloat) as c4, + ((-(cdouble)) + cbigint) as c5, + (-(cdouble)) as c6, + (-1.389 - cfloat) as c7, + (-(cfloat)) as c8, + ((-5638.15 - cdouble) + (cint + cfloat)) as c9 +FROM alltypesparquet +WHERE (((cstring2 IS NOT NULL) + AND ((ctimestamp1 <= 12.503) + AND (ctimestamp2 != 11.998))) + OR ((cfloat < -6432) + OR ((cboolean1 IS NOT NULL) + AND (cdouble = 988888)))) +ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 +LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +1969-12-31 15:59:30.929 -200.0 NULL NULL 12.0 200.0 -5438.15 51400.0 NULL 2.128824065E9 200.0 -13.389 -12.0 NULL +1969-12-31 15:59:30.929 -200.0 NULL NULL 42.0 200.0 -5438.15 51400.0 NULL -5.78935585E8 200.0 -43.389 -42.0 NULL +1969-12-31 15:59:30.929 -7196.0 NULL NULL 49.0 7196.0 1557.8500000000004 1849372.0 NULL 1.948230809E9 7196.0 -50.389 -49.0 NULL +1969-12-31 15:59:30.929 15601.0 NULL NULL -54.0 -15601.0 -21239.15 -4009457.0 NULL -1.114185408E9 -15601.0 52.611 54.0 NULL +1969-12-31 15:59:43.628 -200.0 NULL NULL 23.0 200.0 -5438.15 51400.0 NULL 2.034056161E9 200.0 -24.389 -23.0 NULL +1969-12-31 15:59:43.637 -7196.0 NULL NULL -44.0 7196.0 1557.8500000000004 1849372.0 NULL 1.100301451E9 7196.0 42.611 44.0 NULL +1969-12-31 15:59:43.64 -7196.0 NULL NULL -51.0 7196.0 1557.8500000000004 1849372.0 NULL -1.339157623E9 7196.0 49.611 51.0 NULL +1969-12-31 15:59:43.661 -200.0 NULL NULL 13.0 200.0 -5438.15 51400.0 NULL -6.8063459E7 200.0 -14.389 -13.0 NULL +1969-12-31 15:59:43.676 -200.0 NULL NULL -10.0 200.0 -5438.15 51400.0 NULL 8.34296111E8 200.0 8.611 10.0 NULL +1969-12-31 15:59:43.705 15601.0 NULL NULL -12.0 -15601.0 -21239.15 -4009457.0 NULL 5.95398978E8 -15601.0 10.611 12.0 NULL +1969-12-31 15:59:43.709 15601.0 NULL NULL 61.0 -15601.0 -21239.15 -4009457.0 NULL -1.55652257E8 -15601.0 -62.389 -61.0 NULL +1969-12-31 15:59:43.72 -200.0 NULL NULL -18.0 200.0 -5438.15 51400.0 NULL 1.809037075E9 200.0 16.611 18.0 NULL +1969-12-31 15:59:43.721 15601.0 NULL NULL 57.0 -15601.0 -21239.15 -4009457.0 NULL -1.465672807E9 -15601.0 -58.389 -57.0 NULL +1969-12-31 15:59:43.749 -7196.0 NULL NULL -39.0 7196.0 1557.8500000000004 1849372.0 NULL 3.96436076E8 7196.0 37.611 39.0 NULL +1969-12-31 15:59:43.771 15601.0 NULL NULL 49.0 -15601.0 -21239.15 -4009457.0 NULL -1.10733693E9 -15601.0 -50.389 -49.0 NULL +1969-12-31 15:59:43.773 -200.0 NULL NULL -24.0 200.0 -5438.15 51400.0 NULL -1.046049502E9 200.0 22.611 24.0 NULL +1969-12-31 15:59:43.782 -7196.0 NULL NULL 51.0 7196.0 1557.8500000000004 1849372.0 NULL -1.607339819E9 7196.0 -52.389 -51.0 NULL +1969-12-31 15:59:43.783 -200.0 NULL NULL -11.0 200.0 -5438.15 51400.0 NULL 1.2116287E7 200.0 9.611 11.0 NULL +1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL +1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL