This is an automated email from the ASF dual-hosted git repository. rameshkumar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new fdea1bd1ba3 HIVE-26269 Class cast exception when vectorization is enabled for certain case when cases (#3329) fdea1bd1ba3 is described below commit fdea1bd1ba3c4b2b27ef2bf0a463ca91d4d44653 Author: Ramesh Kumar <rameshkumarthangara...@gmail.com> AuthorDate: Wed Jun 15 10:31:44 2022 -0700 HIVE-26269 Class cast exception when vectorization is enabled for certain case when cases (#3329) --- .../hive/ql/exec/vector/VectorizationContext.java | 19 +- .../queries/clientpositive/vector_case_when_3.q | 9 + .../clientpositive/llap/vector_case_when_3.q.out | 288 +++++++++++++++++++++ 3 files changed, 312 insertions(+), 4 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 6a897939819..6d0e4899e68 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2407,7 +2407,7 @@ import com.google.common.annotations.VisibleForTesting; } else if (udf instanceof GenericUDFToString) { ve = getCastToString(childExpr, returnType); } else if (udf instanceof GenericUDFToDecimal) { - ve = getCastToDecimal(childExpr, returnType); + ve = getCastToDecimal(childExpr, mode, returnType); } else if (udf instanceof GenericUDFToChar) { ve = getCastToChar(childExpr, returnType); } else if (udf instanceof GenericUDFToVarchar) { @@ -3232,8 +3232,8 @@ import com.google.common.annotations.VisibleForTesting; return null; } - private VectorExpression getCastToDecimal(List<ExprNodeDesc> childExpr, TypeInfo returnType) - throws HiveException { + private VectorExpression getCastToDecimal(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, + TypeInfo returnType) throws HiveException { ExprNodeDesc child = childExpr.get(0); String inputType = childExpr.get(0).getTypeString(); if (child instanceof ExprNodeConstantDesc) { @@ -3278,7 +3278,18 @@ import com.google.common.annotations.VisibleForTesting; int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex); if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { - + // try to scale up the expression so we can match the return type scale + if (tryDecimal64Cast && ((DecimalTypeInfo)returnType).precision() <= 18) { + List<ExprNodeDesc> children = new ArrayList<>(); + int scaleDiff = ((DecimalTypeInfo)returnType).scale() - ((DecimalTypeInfo)childExpr.get(0).getTypeInfo()).scale(); + ExprNodeDesc newConstant = new ExprNodeConstantDesc(new DecimalTypeInfo(scaleDiff, 0), + HiveDecimal.create(POWEROFTENTABLE[scaleDiff])); + children.add(child); + children.add(newConstant); + ExprNodeGenericFuncDesc newScaledExpr = new ExprNodeGenericFuncDesc(returnType, + new GenericUDFOPScaleUpDecimal64(), " ScaleUp ", children); + return getVectorExpression(newScaledExpr, mode); + } // Do Decimal64 conversion instead. return createDecimal64ToDecimalConversion(colIndex, returnType); } else { diff --git a/ql/src/test/queries/clientpositive/vector_case_when_3.q b/ql/src/test/queries/clientpositive/vector_case_when_3.q new file mode 100644 index 00000000000..35a157a9941 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_case_when_3.q @@ -0,0 +1,9 @@ +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.enabled=true; +create external table test_decimal(rattag string, newclt_all decimal(15,2)) stored as orc; +insert into test_decimal values('a', '10.20'); +explain vectorization detail select sum(case when rattag='a' then newclt_all*0.3 else newclt_all end) from test_decimal; +select sum(case when rattag='a' then newclt_all*0.3 else newclt_all end) from test_decimal; +explain vectorization detail select sum(case when rattag='Y' then newclt_all*0.3 else newclt_all end) from test_decimal; +select sum(case when rattag='Y' then newclt_all*0.3 else newclt_all end) from test_decimal; diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_3.q.out b/ql/src/test/results/clientpositive/llap/vector_case_when_3.q.out new file mode 100644 index 00000000000..ddfe1d85aa9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_case_when_3.q.out @@ -0,0 +1,288 @@ +PREHOOK: query: create external table test_decimal(rattag string, newclt_all decimal(15,2)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_decimal +POSTHOOK: query: create external table test_decimal(rattag string, newclt_all decimal(15,2)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_decimal +PREHOOK: query: insert into test_decimal values('a', '10.20') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_decimal +POSTHOOK: query: insert into test_decimal values('a', '10.20') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_decimal +POSTHOOK: Lineage: test_decimal.newclt_all SCRIPT [] +POSTHOOK: Lineage: test_decimal.rattag SCRIPT [] +PREHOOK: query: explain vectorization detail select sum(case when rattag='a' then newclt_all*0.3 else newclt_all end) from test_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@test_decimal +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select sum(case when rattag='a' then newclt_all*0.3 else newclt_all end) from test_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_decimal +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_decimal + Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rattag:string, 1:newclt_all:decimal(15,2)/DECIMAL_64, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 3:ROW__IS__DELETED:boolean] + Select Operator + expressions: if((rattag = 'a'), (newclt_all * 0.3), newclt_all) (type: decimal(17,3)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10] + selectExpressions: IfExprDecimal64ColumnDecimal64Column(col 7:boolean, col 8:decimal(17,3)/DECIMAL_64, col 9:decimal(17,3)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 0:string, val a) -> 7:boolean, Decimal64ColMultiplyDecimal64ScalarUnscaled(col 1:decimal(15,2)/DECIMAL_64, decimal64Val 3, decimalVal 0.3) -> 8:decimal(17,3)/DECIMAL_64, Decimal64ColScaleUp(col 1:decimal(15,2)/DECIMAL_64, decimal64Val 10, decimalVal 10) -> 9:decimal(17,3)/DECIMAL_64) -> [...] + Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal64ToDecimal(col 10:decimal(17,3)/DECIMAL_64) -> decimal(27,3) + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:decimal(27,3) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(27,3)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: rattag:string, newclt_all:decimal(15,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(17,3)/DECIMAL_64, decimal(17,3), bigint, decimal(17,3)/DECIMAL_64, decimal(17,3)/DECIMAL_64, decimal(17,3)/DECIMAL_64] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:decimal(27,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(27,3)) -> decimal(27,3) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(case when rattag='a' then newclt_all*0.3 else newclt_all end) from test_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@test_decimal +#### A masked pattern was here #### +POSTHOOK: query: select sum(case when rattag='a' then newclt_all*0.3 else newclt_all end) from test_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_decimal +#### A masked pattern was here #### +3.060 +PREHOOK: query: explain vectorization detail select sum(case when rattag='Y' then newclt_all*0.3 else newclt_all end) from test_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@test_decimal +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select sum(case when rattag='Y' then newclt_all*0.3 else newclt_all end) from test_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_decimal +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_decimal + Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rattag:string, 1:newclt_all:decimal(15,2)/DECIMAL_64, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 3:ROW__IS__DELETED:boolean] + Select Operator + expressions: if((rattag = 'Y'), (newclt_all * 0.3), newclt_all) (type: decimal(17,3)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10] + selectExpressions: IfExprDecimal64ColumnDecimal64Column(col 7:boolean, col 8:decimal(17,3)/DECIMAL_64, col 9:decimal(17,3)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 0:string, val Y) -> 7:boolean, Decimal64ColMultiplyDecimal64ScalarUnscaled(col 1:decimal(15,2)/DECIMAL_64, decimal64Val 3, decimalVal 0.3) -> 8:decimal(17,3)/DECIMAL_64, Decimal64ColScaleUp(col 1:decimal(15,2)/DECIMAL_64, decimal64Val 10, decimalVal 10) -> 9:decimal(17,3)/DECIMAL_64) -> [...] + Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal64ToDecimal(col 10:decimal(17,3)/DECIMAL_64) -> decimal(27,3) + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:decimal(27,3) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(27,3)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: rattag:string, newclt_all:decimal(15,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(17,3)/DECIMAL_64, decimal(17,3), bigint, decimal(17,3)/DECIMAL_64, decimal(17,3)/DECIMAL_64, decimal(17,3)/DECIMAL_64] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:decimal(27,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(27,3)) -> decimal(27,3) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(case when rattag='Y' then newclt_all*0.3 else newclt_all end) from test_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@test_decimal +#### A masked pattern was here #### +POSTHOOK: query: select sum(case when rattag='Y' then newclt_all*0.3 else newclt_all end) from test_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_decimal +#### A masked pattern was here #### +10.200