http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out new file mode 100644 index 0000000..5ba7587 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out @@ -0,0 +1,608 @@ +PREHOOK: query: explain vectorization expression +select cdouble / 0.0 from alltypesparquet limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select cdouble / 0.0 from alltypesparquet limit 100 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: (cdouble / 0.0) (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: DoubleColDivideDoubleScalar(col 5:double, val 0.0) -> 13:double + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select cdouble / 0.0 from alltypesparquet limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: select cdouble / 0.0 from alltypesparquet limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: explain vectorization expression +select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) +from alltypesparquet where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) +from alltypesparquet where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColLessLongScalar(col 3:bigint, val 100000000)) + predicate: ((cbigint < 100000000) and (cbigint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13, 16, 18] + selectExpressions: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 13:bigint, DoubleColDivideDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 15:double) -> 16:double, DecimalScalarDivideDecimalColumn(val 1.2, col 17:decimal(19,0))(children: CastLongToDecimal(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 17:decimal(19,0)) -> 18:decimal(22,21) + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(22,21)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) +from alltypesparquet where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) +from alltypesparquet where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-985319 NULL -0.000001217879691754650 +-985319 2.0297994862577501E-4 -0.000001217879691754650 +-63925 0.11256941728588189 -0.000018771998435666797 +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +0 NULL NULL +392309 NULL 0.000003058813333367320 +673083 -0.010691103474608629 0.000001782841046349410 +2331159 NULL 0.000000514765402102559 +2342037 NULL 0.000000512374484263058 +3533105 -5.660743170667161E-5 0.000000339644590240030 +3768727 0.004139594085748318 0.000000318409903397089 +4728619 NULL 0.000000253773881972728 +5391403 NULL 0.000000222576572369010 +7022666 -0.0010246820794268159 0.000000170875277280736 +7470430 NULL 0.000000160633323650714 +8276429 NULL 0.000000144990067576246 +8286860 -8.683626850218298E-4 0.000000144807562816314 +8299981 -8.669899364829872E-4 0.000000144578644216174 +9247593 NULL 0.000000129763496295739 +9821695 -7.326637611939691E-4 0.000000122178503812224 +10000738 0.001559984873116364 0.000000119991144653525 +10081828 0.0015474376273826532 0.000000119026033770860 +10745355 -6.696847149303117E-4 0.000000111676161466978 +11127199 -1.797397530142132E-5 0.000000107843851808528 +11722580 NULL 0.000000102366543883684 +12649396 NULL 0.000000094866189658384 +13126214 -1.5236685917203544E-5 0.000000091420115503221 +14042667 NULL 0.000000085453852889910 +14943972 -1.3383322720358416E-5 0.000000080299936322150 +16259022 NULL 0.000000073805177211766 +16531556 -1.2098074736582569E-5 0.000000072588448419495 +16596157 NULL 0.000000072305895876979 +17058489 -1.1724367849930905E-5 0.000000070346207099585 +17247320 -4.172242412154468E-4 0.000000069576026884177 +19004427 8.209139901981786E-4 0.000000063143182375349 +19498517 NULL 0.000000061543141973310 +20165679 7.736411950224934E-4 0.000000059507046601307 +20547875 NULL 0.000000058400199534015 +23264783 NULL 0.000000051580107151655 +23475527 6.645644206411213E-4 0.000000051117063314489 +24379905 NULL 0.000000049220864478348 +24514624 -2.935390728407664E-4 0.000000048950373458716 +25154198 -2.860755091456305E-4 0.000000047705754721339 +25245192 -7.922300610745999E-6 0.000000047533803664476 +26610943 NULL 0.000000045094230595286 +27520143 5.668938566198584E-4 0.000000043604424584567 +27818379 NULL 0.000000043136949137115 +28400244 NULL 0.000000042253158106670 +28698999 5.43607810153936E-4 0.000000041813305056389 +28806400 -6.9429015774272385E-6 0.000000041657409464563 +29920877 5.214085135271938E-4 0.000000040105776311303 +33126539 NULL 0.000000036224732079617 +34603086 NULL 0.000000034678987879867 +35156265 NULL 0.000000034133318769784 +35862260 NULL 0.000000033461360215447 +36123797 -1.992038655294182E-4 0.000000033219099310075 +36341671 -1.980096072082101E-4 0.000000033019945615599 +36413215 -5.4925114412446145E-6 0.000000032955068647468 +36578596 4.2650625518814335E-4 0.000000032806070522772 +36796441 -1.955623914823719E-4 0.000000032611849607955 +39723587 NULL 0.000000030208752296211 +39985709 -1.7996429674411925E-4 0.000000030010722080731 +40018606 NULL 0.000000029986051987918 +41003161 NULL 0.000000029266036342905 +41158231 3.790493328053871E-4 0.000000029155772025285 +41848817 NULL 0.000000028674645689507 +44047567 -1.633688416888043E-4 0.000000027243275434487 +45125678 NULL 0.000000026592398234992 +45180154 NULL 0.000000026560334433566 +45717793 3.4124569399052136E-4 0.000000026247986205283 +46163162 NULL 0.000000025994753132379 +46525838 3.353190543284787E-4 0.000000025792120068853 +48626663 NULL 0.000000024677819244969 +49102701 -1.465499830650864E-4 0.000000024438574163161 +50300445 -1.4306036457530346E-4 0.000000023856647789100 +50929325 -1.412938420055636E-4 0.000000023562063702984 +52422534 -1.3726921327381848E-4 0.000000022890919389742 +52667422 2.9621727070673783E-4 0.000000022784483356713 +52962061 2.945693522010029E-4 0.000000022657728520044 +53695172 NULL 0.000000022348377988248 +54760317 NULL 0.000000021913678841560 +55020655 2.835480602693661E-4 0.000000021809991175132 +56102034 NULL 0.000000021389598815615 +56131313 NULL 0.000000021378441655195 +56838351 -3.5187509222426247E-6 0.000000021112505533456 +56997841 -3.5089048372902406E-6 0.000000021053429023741 +57778807 -1.2454393528755274E-4 0.000000020768860803928 +58080381 NULL 0.000000020661021490200 +58307527 NULL 0.000000020580533281749 +58536385 -1.2293208745295768E-4 0.000000020500070170032 +59347745 NULL 0.000000020219807846111 +60229567 NULL 0.000000019923769334088 +60330397 NULL 0.000000019890470801974 +PREHOOK: query: explain vectorization expression +select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) +from alltypesparquet where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) +from alltypesparquet where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -500.0), FilterDoubleColLessDoubleScalar(col 5:double, val -199.0)) + predicate: ((cdouble < -199.0) and (cdouble >= -500.0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13, 16, 17, 15, 18] + selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 16:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 17:double, DoubleScalarDivideDoubleColumn(val 3.0, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 15:double, DoubleScalarDivideDoubleColumn(val 1.2, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 18:double + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 1, 3, 4] + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) +from alltypesparquet where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 3 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) +from alltypesparquet where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-292.0 NULL 1.0 NULL -0.010273972602739725 -0.00410958904109589 +-290.0 NULL 1.0 NULL -0.010344827586206896 -0.004137931034482759 +-289.0 NULL 1.0 NULL -0.010380622837370242 -0.004152249134948096 +-281.0 NULL 1.0 NULL -0.010676156583629894 -0.004270462633451957 +-279.0 NULL 1.0 NULL -0.010752688172043012 -0.004301075268817204 +-274.0 6888911.518248175 1.0 6888911.518248175 -0.010948905109489052 -0.00437956204379562 +-273.0 6028764.868131869 1.0 6028764.868131869 -0.01098901098901099 -0.004395604395604396 +-257.0 6404096.53307393 1.0 6404096.53307393 -0.011673151750972763 -0.004669260700389105 +-250.0 6583411.236 1.0 6583411.236 -0.012 -0.0048 +-247.0 NULL 1.0 NULL -0.012145748987854251 -0.004858299595141701 +-247.0 -7546669.174089069 1.0 -7546669.174089069 -0.012145748987854251 -0.004858299595141701 +-246.0 NULL 1.0 NULL -0.012195121951219513 -0.004878048780487805 +-237.0 NULL 1.0 NULL -0.012658227848101266 -0.005063291139240506 +-236.0 NULL 1.0 NULL -0.012711864406779662 -0.005084745762711864 +-229.0 7187130.170305677 1.0 7187130.170305677 -0.013100436681222707 -0.005240174672489083 +-228.0 8278779.631578947 1.0 8278779.631578947 -0.013157894736842105 -0.005263157894736842 +-225.0 NULL 1.0 NULL -0.013333333333333334 -0.005333333333333333 +-210.0 -8876320.40952381 1.0 -8876320.40952381 -0.014285714285714285 -0.005714285714285714 +-201.0 NULL 1.0 NULL -0.014925373134328358 -0.005970149253731343 +-199.0 NULL 1.0 NULL -0.01507537688442211 -0.006030150753768844 +-189.0 NULL 1.0 NULL -0.015873015873015872 -0.006349206349206349 +-188.0 NULL 1.0 NULL -0.015957446808510637 -0.006382978723404255 +-184.0 8944852.222826088 1.0 8944852.222826088 -0.016304347826086956 -0.006521739130434782 +-183.0 8993731.196721312 1.0 8993731.196721312 -0.01639344262295082 -0.006557377049180328 +-181.0 NULL 1.0 NULL -0.016574585635359115 -0.0066298342541436465 +-179.0 NULL 1.0 NULL -0.01675977653631285 -0.0067039106145251395 +-169.0 9738774.01775148 1.0 9738774.01775148 -0.01775147928994083 -0.007100591715976331 +-164.0 NULL 1.0 NULL -0.018292682926829267 -0.007317073170731707 +-161.0 NULL 1.0 NULL -0.018633540372670808 -0.007453416149068323 +-154.0 1.2256894519480519E7 1.0 1.2256894519480519E7 -0.01948051948051948 -0.007792207792207792 +-152.0 NULL 1.0 NULL -0.019736842105263157 -0.007894736842105263 +-148.0 NULL 1.0 NULL -0.02027027027027027 -0.008108108108108109 +-140.0 NULL 1.0 NULL -0.02142857142857143 -0.008571428571428572 +-138.0 NULL 1.0 NULL -0.021739130434782608 -0.008695652173913044 +-137.0 NULL 1.0 NULL -0.021897810218978103 -0.00875912408759124 +-132.0 NULL 1.0 NULL -0.022727272727272728 -0.00909090909090909 +-129.0 1.2758548906976745E7 1.0 1.2758548906976745E7 -0.023255813953488372 -0.009302325581395349 +-128.0 NULL 1.0 NULL -0.0234375 -0.009375 +-126.0 NULL 1.0 NULL -0.023809523809523808 -0.009523809523809523 +-126.0 -1.4793867349206349E7 1.0 -1.4793867349206349E7 -0.023809523809523808 -0.009523809523809523 +-116.0 NULL 1.0 NULL -0.02586206896551724 -0.010344827586206896 +-113.0 NULL 1.0 NULL -0.02654867256637168 -0.010619469026548672 +-113.0 -1.6495816690265486E7 1.0 -1.6495816690265486E7 -0.02654867256637168 -0.010619469026548672 +-96.0 NULL 1.0 NULL -0.03125 -0.012499999999999999 +-94.0 -1.9830077510638297E7 1.0 -1.9830077510638297E7 -0.031914893617021274 -0.01276595744680851 +-93.0 NULL 1.0 NULL -0.03225806451612903 -0.012903225806451613 +-77.0 2.4513789038961038E7 1.0 2.4513789038961038E7 -0.03896103896103896 -0.015584415584415584 +-69.0 2.735596747826087E7 1.0 2.735596747826087E7 -0.043478260869565216 -0.017391304347826087 +-62.0 NULL 1.0 NULL -0.04838709677419355 -0.01935483870967742 +-62.0 3.0444544451612905E7 1.0 3.0444544451612905E7 -0.04838709677419355 -0.01935483870967742 +-60.0 NULL 1.0 NULL -0.05 -0.02 +-57.0 -3.27022330877193E7 1.0 -3.27022330877193E7 -0.05263157894736842 -0.021052631578947368 +-49.0 3.35888328367347E7 1.0 3.35888328367347E7 -0.061224489795918366 -0.024489795918367346 +-46.0 3.577940889130435E7 1.0 3.577940889130435E7 -0.06521739130434782 -0.02608695652173913 +-38.0 4.3311916026315786E7 1.0 4.3311916026315786E7 -0.07894736842105263 -0.031578947368421054 +-28.0 5.878045746428572E7 1.0 5.878045746428572E7 -0.10714285714285714 -0.04285714285714286 +-28.0 6.741291985714285E7 1.0 6.741291985714285E7 -0.10714285714285714 -0.04285714285714286 +-21.0 8.988389314285715E7 1.0 8.988389314285715E7 -0.14285714285714285 -0.05714285714285714 +-20.0 NULL 1.0 NULL -0.15 -0.06 +-17.0 NULL 1.0 NULL -0.17647058823529413 -0.07058823529411765 +-12.0 -1.5533560716666666E8 1.0 -1.5533560716666666E8 -0.25 -0.09999999999999999 +-3.0 NULL 1.0 NULL -1.0 -0.39999999999999997 +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL +0.0 NULL NULL NULL NULL NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out new file mode 100644 index 0000000..ca696ab --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -0,0 +1,932 @@ +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 7 + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 7 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-1887561756 -10011.0 +-1887561756 -13877.0 +-1887561756 -2281.0 +-1887561756 -8881.0 +-1887561756 10361.0 +-1887561756 1839.0 +-1887561756 9531.0 +PREHOOK: query: explain vectorization detail +select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:tinyint) + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5, 1] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 5] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: smallint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-64 -10462.0 -10462 +-64 -15920.0 -15920 +-64 -1600.0 -1600 +-64 -200.0 -200 +-64 -2919.0 -2919 +-64 -3097.0 -3097 +-64 -3586.0 -3586 +-64 -4018.0 -4018 +-64 -4040.0 -4040 +-64 -4803.0 -4803 +-64 -6907.0 -6907 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -8080.0 -8080 +-64 -9842.0 -9842 +PREHOOK: query: explain vectorization detail +select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: ctinyint (type: tinyint), (cdouble + 1.0) (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 13] + selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDouble(col 13:double) -> struct<count:bigint,sum:double,input:double> + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [1] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: struct<count:bigint,sum:double,input:double>) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [double] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:tinyint, VALUE._col0:struct<count:bigint,sum:double,input:double> + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 1:struct<count:bigint,sum:double,input:double>) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-46 3033.55 +-47 -574.6428571428571 +-48 1672.909090909091 +-49 768.7659574468086 +-50 -960.0192307692307 +-51 -96.46341463414635 +-52 2810.705882352941 +-53 -532.7567567567568 +-54 2712.7272727272725 +-55 2385.595744680851 +-56 2595.818181818182 +-57 1867.0535714285713 +-58 3483.2444444444445 +-59 318.27272727272725 +-60 1071.82 +-61 914.3404255319149 +-62 245.69387755102042 +-63 2178.7272727272725 +-64 373.52941176470586 +NULL 9370.0945309795 +PREHOOK: query: explain vectorization detail +select distinct(ctinyint) from alltypesparquet limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select distinct(ctinyint) from alltypesparquet limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select distinct(ctinyint) from alltypesparquet limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: select distinct(ctinyint) from alltypesparquet limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-46 +-47 +-48 +-49 +-50 +-51 +-52 +-53 +-54 +-55 +-56 +-57 +-58 +-59 +-60 +-61 +-62 +-63 +-64 +NULL +PREHOOK: query: explain vectorization detail +select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double) + outputColumnNames: ctinyint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint, col 5:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint), cdouble (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:tinyint, KEY._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint, col 1:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1:double) -> bigint + className: VectorGroupByOperator + groupByMode: COMPLETE + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] + keys: _col0 (type: tinyint) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-46 24 +-47 22 +-48 29 +-49 26 +-50 30 +-51 21 +-52 33 +-53 22 +-54 26 +-55 29 +-56 36 +-57 35 +-58 23 +-59 31 +-60 27 +-61 25 +-62 27 +-63 19 +-64 24 +NULL 2932 +PREHOOK: query: explain vectorization detail +select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +PREHOOK: query: explain vectorization detail +select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:tinyint) + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 5:double + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: cdouble (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:double, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint), _col0 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-10462.0 -64 +-1121.0 -89 +-11322.0 -101 +-11492.0 -78 +-15920.0 -64 +-4803.0 -64 +-6907.0 -64 +-7196.0 -2009 +-8080.0 -64 +-8118.0 -80 +-9842.0 -64 +10496.0 -67 +15601.0 -1733 +3520.0 -86 +4811.0 -115 +5241.0 -80 +557.0 -75 +7705.0 -88 +9452.0 -76 +NULL -32768 http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_nested_udf.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_nested_udf.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_nested_udf.q.out new file mode 100644 index 0000000..acac581 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_nested_udf.q.out @@ -0,0 +1,9 @@ +PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesparquet +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesparquet +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +261468 http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out new file mode 100644 index 0000000..e581007 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out @@ -0,0 +1,58 @@ +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesparquet +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (NOT(cbigint >= cdouble)))) + OR ((ctinyint >= csmallint) + AND (NOT ((cboolean2 != 1) + OR (3569 != ctinyint))))) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesparquet +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (NOT(cbigint >= cdouble)))) + OR ((ctinyint >= csmallint) + AND (NOT ((cboolean2 != 1) + OR (3569 != ctinyint))))) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 http://git-wip-us.apache.org/repos/asf/hive/blob/9a59592e/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out new file mode 100644 index 0000000..e90cff4 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out @@ -0,0 +1,184 @@ +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 3,2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 3,2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Offset of rows: 3 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 3,2 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesparquet WHERE cbigint < cdouble and cint > 0 limit 3,2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-1887561756 10361.0 +-1887561756 -8881.0 +PREHOOK: query: explain vectorization expression +select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 10,3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 10,3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:tinyint) + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5, 1] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: smallint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 3 + Limit Vectorization: + className: VectorLimitOperator + native: true + Offset of rows: 10 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 10,3 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 10,3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-64 -7196.0 -7196 +-64 -7196.0 -7196 +-64 -7196.0 -7196