[ https://issues.apache.org/jira/browse/HIVE-25193?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
qiang.bi updated HIVE-25193: ---------------------------- Description: Problem statement: {code:java} set hive.vectorized.execution.enabled = true; select nvl(get_json_object(attr_json,'$.correctedPrice'),0.88) corrected_price from dw_mdm_sync_asset; {code} The error log: {code:java} Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector cannot be cast to org.apache.hadoop.hive.ql.exec.vector.BytesColumnVectorCaused by: java.lang.ClassCastException: org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector cannot be cast to org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector at org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setElement(BytesColumnVector.java:504) at org.apache.hadoop.hive.ql.exec.vector.expressions.VectorCoalesce.evaluate(VectorCoalesce.java:124) at org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression.evaluateChildren(VectorExpression.java:271) at org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDouble.evaluate(CastStringToDouble.java:83) at org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:146) ... 28 more{code} The problem HiveQL: {code:java} nvl(get_json_object(attr_json,'$.correctedPrice'),0.88) corrected_price {code} The problem expression: {code:java} CastStringToDouble(col 39:string)(children: VectorCoalesce(columns [37, 38])(children: VectorUDFAdaptor(get_json_object(_col14, '$.correctedPrice')) -> 37:string, ConstantVectorExpression(val 0.88) -> 38:decimal(2,2)) -> 39:string) -> 40:double {code} The problem code: {code:java} public class VectorCoalesce extends VectorExpression { ... @Override public void evaluate(VectorizedRowBatch batch) throws HiveException { if (childExpressions != null) { super.evaluateChildren(batch); } int[] sel = batch.selected; int n = batch.size; ColumnVector outputColVector = batch.cols[outputColumnNum]; boolean[] outputIsNull = outputColVector.isNull; if (n <= 0) { // Nothing to do return; } if (unassignedBatchIndices == null || n > unassignedBatchIndices.length) { // (Re)allocate larger to be a multiple of 1024 (DEFAULT_SIZE). final int roundUpSize = ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) / VectorizedRowBatch.DEFAULT_SIZE) * VectorizedRowBatch.DEFAULT_SIZE; unassignedBatchIndices = new int[roundUpSize]; } // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; // CONSIDER: Should be do this for all vector expressions that can // work on BytesColumnVector output columns??? outputColVector.init(); final int columnCount = inputColumns.length; /* * Process the input columns to find a non-NULL value for each row. * * We track the unassigned batchIndex of the rows that have not received * a non-NULL value yet. Similar to a selected array. */ boolean isAllUnassigned = true; int unassignedColumnCount = 0; for (int k = 0; k < inputColumns.length; k++) { ColumnVector cv = batch.cols[inputColumns[k]]; if (cv.isRepeating) { if (cv.noNulls || !cv.isNull[0]) { /* * With a repeating value we can finish all remaining rows. */ if (isAllUnassigned) { // No other columns provided non-NULL values. We can return repeated output. outputIsNull[0] = false; outputColVector.setElement(0, 0, cv); outputColVector.isRepeating = true; return; } else { // Some rows have already been assigned values. Assign the remaining. // We cannot use copySelected method here. for (int i = 0; i < unassignedColumnCount; i++) { final int batchIndex = unassignedBatchIndices[i]; outputIsNull[batchIndex] = false; // Our input is repeating (i.e. inputColNumber = 0). outputColVector.setElement(batchIndex, 0, cv); } return; } } else { // Repeated NULLs -- skip this input column. } } else { /* * Non-repeating input column. Use any non-NULL values for unassigned rows. */ if (isAllUnassigned) { /* * No other columns provided non-NULL values. We *may* be able to finish all rows * with this input column... */ if (cv.noNulls){ // Since no NULLs, we can provide values for all rows. if (batch.selectedInUse) { for (int i = 0; i < n; i++) { final int batchIndex = sel[i]; outputIsNull[batchIndex] = false; outputColVector.setElement(batchIndex, batchIndex, cv); } } else { Arrays.fill(outputIsNull, 0, n, false); for (int batchIndex = 0; batchIndex < n; batchIndex++) { outputColVector.setElement(batchIndex, batchIndex, cv); } } return; } else { // We might not be able to assign all rows because of input NULLs. Start tracking any // unassigned rows. boolean[] inputIsNull = cv.isNull; if (batch.selectedInUse) { for (int i = 0; i < n; i++) { final int batchIndex = sel[i]; if (!inputIsNull[batchIndex]) { outputIsNull[batchIndex] = false; outputColVector.setElement(batchIndex, batchIndex, cv); } else { unassignedBatchIndices[unassignedColumnCount++] = batchIndex; } } } else { for (int batchIndex = 0; batchIndex < n; batchIndex++) { if (!inputIsNull[batchIndex]) { outputIsNull[batchIndex] = false; outputColVector.setElement(batchIndex, batchIndex, cv); } else { unassignedBatchIndices[unassignedColumnCount++] = batchIndex; } } } if (unassignedColumnCount == 0) { return; } isAllUnassigned = false; } } else { /* * We previously assigned *some* rows with non-NULL values. The batch indices of * the unassigned row were tracked. */ if (cv.noNulls) { // Assign all remaining rows. for (int i = 0; i < unassignedColumnCount; i++) { final int batchIndex = unassignedBatchIndices[i]; outputIsNull[batchIndex] = false; outputColVector.setElement(batchIndex, batchIndex, cv); } return; } else { // Use any non-NULL values found; remember the remaining unassigned. boolean[] inputIsNull = cv.isNull; int newUnassignedColumnCount = 0; for (int i = 0; i < unassignedColumnCount; i++) { final int batchIndex = unassignedBatchIndices[i]; if (!inputIsNull[batchIndex]) { outputIsNull[batchIndex] = false; outputColVector.setElement(batchIndex, batchIndex, cv); } else { unassignedBatchIndices[newUnassignedColumnCount++] = batchIndex; } } if (newUnassignedColumnCount == 0) { return; } unassignedColumnCount = newUnassignedColumnCount; } } } } // NULL out the remaining columns. outputColVector.noNulls = false; if (isAllUnassigned) { outputIsNull[0] = true; outputColVector.isRepeating = true; } else { for (int i = 0; i < unassignedColumnCount; i++) { final int batchIndex = unassignedBatchIndices[i]; outputIsNull[batchIndex] = true; } } } ... } {code} The above code, outputColVector is BytesColumnVector type, but one of the columnVector is DecimalColumnVector type. At present, we can add single quotes in “0.88” to resolve this problem.For example: {code:java} nvl(get_json_object(attr_json,'$.correctedPrice'), '0.88') corrected_price {code} was: Problem statement: {code:java} set hive.vectorized.execution.enabled = true; select nvl(get_json_object(attr_json,'$.correctedPrice'),0.88) corrected_price, from dw_mdm_sync_asset; {code} The error log: {code:java} Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector cannot be cast to org.apache.hadoop.hive.ql.exec.vector.BytesColumnVectorCaused by: java.lang.ClassCastException: org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector cannot be cast to org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector at org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setElement(BytesColumnVector.java:504) at org.apache.hadoop.hive.ql.exec.vector.expressions.VectorCoalesce.evaluate(VectorCoalesce.java:124) at org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression.evaluateChildren(VectorExpression.java:271) at org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDouble.evaluate(CastStringToDouble.java:83) at org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:146) ... 28 more{code} The problem HiveQL: {code:java} nvl(get_json_object(attr_json,'$.correctedPrice'),0.88) corrected_price {code} The problem expression: {code:java} CastStringToDouble(col 39:string)(children: VectorCoalesce(columns [37, 38])(children: VectorUDFAdaptor(get_json_object(_col14, '$.correctedPrice')) -> 37:string, ConstantVectorExpression(val 0.88) -> 38:decimal(2,2)) -> 39:string) -> 40:double {code} The problem code: {code:java} public class VectorCoalesce extends VectorExpression { ... @Override public void evaluate(VectorizedRowBatch batch) throws HiveException { if (childExpressions != null) { super.evaluateChildren(batch); } int[] sel = batch.selected; int n = batch.size; ColumnVector outputColVector = batch.cols[outputColumnNum]; boolean[] outputIsNull = outputColVector.isNull; if (n <= 0) { // Nothing to do return; } if (unassignedBatchIndices == null || n > unassignedBatchIndices.length) { // (Re)allocate larger to be a multiple of 1024 (DEFAULT_SIZE). final int roundUpSize = ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) / VectorizedRowBatch.DEFAULT_SIZE) * VectorizedRowBatch.DEFAULT_SIZE; unassignedBatchIndices = new int[roundUpSize]; } // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; // CONSIDER: Should be do this for all vector expressions that can // work on BytesColumnVector output columns??? outputColVector.init(); final int columnCount = inputColumns.length; /* * Process the input columns to find a non-NULL value for each row. * * We track the unassigned batchIndex of the rows that have not received * a non-NULL value yet. Similar to a selected array. */ boolean isAllUnassigned = true; int unassignedColumnCount = 0; for (int k = 0; k < inputColumns.length; k++) { ColumnVector cv = batch.cols[inputColumns[k]]; if (cv.isRepeating) { if (cv.noNulls || !cv.isNull[0]) { /* * With a repeating value we can finish all remaining rows. */ if (isAllUnassigned) { // No other columns provided non-NULL values. We can return repeated output. outputIsNull[0] = false; outputColVector.setElement(0, 0, cv); outputColVector.isRepeating = true; return; } else { // Some rows have already been assigned values. Assign the remaining. // We cannot use copySelected method here. for (int i = 0; i < unassignedColumnCount; i++) { final int batchIndex = unassignedBatchIndices[i]; outputIsNull[batchIndex] = false; // Our input is repeating (i.e. inputColNumber = 0). outputColVector.setElement(batchIndex, 0, cv); } return; } } else { // Repeated NULLs -- skip this input column. } } else { /* * Non-repeating input column. Use any non-NULL values for unassigned rows. */ if (isAllUnassigned) { /* * No other columns provided non-NULL values. We *may* be able to finish all rows * with this input column... */ if (cv.noNulls){ // Since no NULLs, we can provide values for all rows. if (batch.selectedInUse) { for (int i = 0; i < n; i++) { final int batchIndex = sel[i]; outputIsNull[batchIndex] = false; outputColVector.setElement(batchIndex, batchIndex, cv); } } else { Arrays.fill(outputIsNull, 0, n, false); for (int batchIndex = 0; batchIndex < n; batchIndex++) { outputColVector.setElement(batchIndex, batchIndex, cv); } } return; } else { // We might not be able to assign all rows because of input NULLs. Start tracking any // unassigned rows. boolean[] inputIsNull = cv.isNull; if (batch.selectedInUse) { for (int i = 0; i < n; i++) { final int batchIndex = sel[i]; if (!inputIsNull[batchIndex]) { outputIsNull[batchIndex] = false; outputColVector.setElement(batchIndex, batchIndex, cv); } else { unassignedBatchIndices[unassignedColumnCount++] = batchIndex; } } } else { for (int batchIndex = 0; batchIndex < n; batchIndex++) { if (!inputIsNull[batchIndex]) { outputIsNull[batchIndex] = false; outputColVector.setElement(batchIndex, batchIndex, cv); } else { unassignedBatchIndices[unassignedColumnCount++] = batchIndex; } } } if (unassignedColumnCount == 0) { return; } isAllUnassigned = false; } } else { /* * We previously assigned *some* rows with non-NULL values. The batch indices of * the unassigned row were tracked. */ if (cv.noNulls) { // Assign all remaining rows. for (int i = 0; i < unassignedColumnCount; i++) { final int batchIndex = unassignedBatchIndices[i]; outputIsNull[batchIndex] = false; outputColVector.setElement(batchIndex, batchIndex, cv); } return; } else { // Use any non-NULL values found; remember the remaining unassigned. boolean[] inputIsNull = cv.isNull; int newUnassignedColumnCount = 0; for (int i = 0; i < unassignedColumnCount; i++) { final int batchIndex = unassignedBatchIndices[i]; if (!inputIsNull[batchIndex]) { outputIsNull[batchIndex] = false; outputColVector.setElement(batchIndex, batchIndex, cv); } else { unassignedBatchIndices[newUnassignedColumnCount++] = batchIndex; } } if (newUnassignedColumnCount == 0) { return; } unassignedColumnCount = newUnassignedColumnCount; } } } } // NULL out the remaining columns. outputColVector.noNulls = false; if (isAllUnassigned) { outputIsNull[0] = true; outputColVector.isRepeating = true; } else { for (int i = 0; i < unassignedColumnCount; i++) { final int batchIndex = unassignedBatchIndices[i]; outputIsNull[batchIndex] = true; } } } ... } {code} The above code, outputColVector is BytesColumnVector type, but one of the columnVector is DecimalColumnVector type. At present, we can add single quotes in “0.88” to resolve this problem.For example: {code:java} nvl(get_json_object(attr_json,'$.correctedPrice'), '0.88') corrected_price {code} > Vectorized Query Execution: ClassCastException when use nvl() function which > default_value is decimal type > ---------------------------------------------------------------------------------------------------------- > > Key: HIVE-25193 > URL: https://issues.apache.org/jira/browse/HIVE-25193 > Project: Hive > Issue Type: Bug > Components: Vectorization > Affects Versions: 4.0.0 > Reporter: qiang.bi > Assignee: qiang.bi > Priority: Major > Labels: pull-request-available > Attachments: HIVE-25193.1.patch > > Time Spent: 10m > Remaining Estimate: 0h > > Problem statement: > {code:java} > set hive.vectorized.execution.enabled = true; > select nvl(get_json_object(attr_json,'$.correctedPrice'),0.88) > corrected_price from dw_mdm_sync_asset; > {code} > The error log: > {code:java} > Caused by: java.lang.ClassCastException: > org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector cannot be cast to > org.apache.hadoop.hive.ql.exec.vector.BytesColumnVectorCaused by: > java.lang.ClassCastException: > org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector cannot be cast to > org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector at > org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setElement(BytesColumnVector.java:504) > at > org.apache.hadoop.hive.ql.exec.vector.expressions.VectorCoalesce.evaluate(VectorCoalesce.java:124) > at > org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression.evaluateChildren(VectorExpression.java:271) > at > org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDouble.evaluate(CastStringToDouble.java:83) > at > org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:146) > ... 28 more{code} > The problem HiveQL: > {code:java} > nvl(get_json_object(attr_json,'$.correctedPrice'),0.88) corrected_price > {code} > The problem expression: > {code:java} > CastStringToDouble(col 39:string)(children: VectorCoalesce(columns [37, > 38])(children: VectorUDFAdaptor(get_json_object(_col14, '$.correctedPrice')) > -> 37:string, ConstantVectorExpression(val 0.88) -> 38:decimal(2,2)) -> > 39:string) -> 40:double > {code} > The problem code: > {code:java} > public class VectorCoalesce extends VectorExpression { > ... > @Override > public void evaluate(VectorizedRowBatch batch) throws HiveException { if > (childExpressions != null) { > super.evaluateChildren(batch); > } int[] sel = batch.selected; > int n = batch.size; > ColumnVector outputColVector = batch.cols[outputColumnNum]; > boolean[] outputIsNull = outputColVector.isNull; > if (n <= 0) { > // Nothing to do > return; > } if (unassignedBatchIndices == null || n > > unassignedBatchIndices.length) { // (Re)allocate larger to be a multiple > of 1024 (DEFAULT_SIZE). > final int roundUpSize = > ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) / > VectorizedRowBatch.DEFAULT_SIZE) > * VectorizedRowBatch.DEFAULT_SIZE; > unassignedBatchIndices = new int[roundUpSize]; > } // We do not need to do a column reset since we are carefully > changing the output. > outputColVector.isRepeating = false; // CONSIDER: Should be do this > for all vector expressions that can > // work on BytesColumnVector output columns??? > outputColVector.init(); > final int columnCount = inputColumns.length; /* > * Process the input columns to find a non-NULL value for each row. > * > * We track the unassigned batchIndex of the rows that have not received > * a non-NULL value yet. Similar to a selected array. > */ > boolean isAllUnassigned = true; > int unassignedColumnCount = 0; > for (int k = 0; k < inputColumns.length; k++) { > ColumnVector cv = batch.cols[inputColumns[k]]; > if (cv.isRepeating) { if (cv.noNulls || !cv.isNull[0]) { > /* > * With a repeating value we can finish all remaining rows. > */ > if (isAllUnassigned) { // No other columns provided > non-NULL values. We can return repeated output. > outputIsNull[0] = false; > outputColVector.setElement(0, 0, cv); > outputColVector.isRepeating = true; > return; > } else { // Some rows have already been assigned values. > Assign the remaining. > // We cannot use copySelected method here. > for (int i = 0; i < unassignedColumnCount; i++) { > final int batchIndex = unassignedBatchIndices[i]; > outputIsNull[batchIndex] = false; // Our input is > repeating (i.e. inputColNumber = 0). > outputColVector.setElement(batchIndex, 0, cv); > } > return; > } > } else { // Repeated NULLs -- skip this input column. > } > } else { /* > * Non-repeating input column. Use any non-NULL values for unassigned > rows. > */ > if (isAllUnassigned) { /* > * No other columns provided non-NULL values. We *may* be able to > finish all rows > * with this input column... > */ > if (cv.noNulls){ // Since no NULLs, we can provide > values for all rows. > if (batch.selectedInUse) { > for (int i = 0; i < n; i++) { > final int batchIndex = sel[i]; > outputIsNull[batchIndex] = false; > outputColVector.setElement(batchIndex, batchIndex, cv); > } > } else { > Arrays.fill(outputIsNull, 0, n, false); > for (int batchIndex = 0; batchIndex < n; batchIndex++) { > outputColVector.setElement(batchIndex, batchIndex, cv); > } > } > return; > } else { // We might not be able to assign all rows > because of input NULLs. Start tracking any > // unassigned rows. > boolean[] inputIsNull = cv.isNull; > if (batch.selectedInUse) { > for (int i = 0; i < n; i++) { > final int batchIndex = sel[i]; > if (!inputIsNull[batchIndex]) { > outputIsNull[batchIndex] = false; > outputColVector.setElement(batchIndex, batchIndex, cv); > } else { > unassignedBatchIndices[unassignedColumnCount++] = > batchIndex; > } > } > } else { > for (int batchIndex = 0; batchIndex < n; batchIndex++) { > if (!inputIsNull[batchIndex]) { > outputIsNull[batchIndex] = false; > outputColVector.setElement(batchIndex, batchIndex, cv); > } else { > unassignedBatchIndices[unassignedColumnCount++] = > batchIndex; > } > } > } > if (unassignedColumnCount == 0) { > return; > } > isAllUnassigned = false; > } > } else { /* > * We previously assigned *some* rows with non-NULL values. The > batch indices of > * the unassigned row were tracked. > */ > if (cv.noNulls) { // Assign all remaining rows. > for (int i = 0; i < unassignedColumnCount; i++) { > final int batchIndex = unassignedBatchIndices[i]; > outputIsNull[batchIndex] = false; > outputColVector.setElement(batchIndex, batchIndex, cv); > } > return; > } else { // Use any non-NULL values found; remember the > remaining unassigned. > boolean[] inputIsNull = cv.isNull; > int newUnassignedColumnCount = 0; > for (int i = 0; i < unassignedColumnCount; i++) { > final int batchIndex = unassignedBatchIndices[i]; > if (!inputIsNull[batchIndex]) { > outputIsNull[batchIndex] = false; > outputColVector.setElement(batchIndex, batchIndex, cv); > } else { > unassignedBatchIndices[newUnassignedColumnCount++] = > batchIndex; > } > } > if (newUnassignedColumnCount == 0) { > return; > } > unassignedColumnCount = newUnassignedColumnCount; > } > } > } > } // NULL out the remaining columns. > outputColVector.noNulls = false; > if (isAllUnassigned) { > outputIsNull[0] = true; > outputColVector.isRepeating = true; > } else { > for (int i = 0; i < unassignedColumnCount; i++) { > final int batchIndex = unassignedBatchIndices[i]; > outputIsNull[batchIndex] = true; > } > } > } > ... > } > {code} > The above code, outputColVector is BytesColumnVector type, but one of the > columnVector is DecimalColumnVector type. > At present, we can add single quotes in “0.88” to resolve this problem.For > example: > {code:java} > nvl(get_json_object(attr_json,'$.correctedPrice'), '0.88') corrected_price > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)