[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15275167#comment-15275167 ] Matt McCline commented on HIVE-12827: - Added to errata.txt for branch-1. > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Fix For: 1.3.0, 2.0.0, 2.1.0 > > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign > extends VectorColumnAssignVectorBase { > protected void assignDouble(double value, int destIndex) { > outCol.vector[destIndex] = value; > } > } > {code} > The pattern to imitate would be the earlier code from VectorBatchUtil > {code} > case DOUBLE: { > DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + > colIndex]; > if (writableCol != null) { > dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); > dcv.isNull[rowIndex] = false; > } else { > dcv.vector[rowIndex] = Double.NaN; > setNullColIsNullValue(dcv, rowIndex); > } > } > break; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15265020#comment-15265020 ] Lefty Leverenz commented on HIVE-12827: --- [~mmccline], the branch-1 commit doesn't include the JIRA number. (Déjà vu: see comment about previous commit.) Please add this to the errata.txt file that was created by HIVE-11704. Commit: 3734d5b674b4e8de9c0cc751650aee3194bfb93a. > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Fix For: 1.3.0, 2.0.0, 2.1.0 > > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign > extends VectorColumnAssignVectorBase { > protected void assignDouble(double value, int destIndex) { > outCol.vector[destIndex] = value; > } > } > {code} > The pattern to imitate would be the earlier code from VectorBatchUtil > {code} > case DOUBLE: { > DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + > colIndex]; > if (writableCol != null) { > dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); > dcv.isNull[rowIndex] = false; > } else { > dcv.vector[rowIndex] = Double.NaN; > setNullColIsNullValue(dcv, rowIndex); > } > } > break; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15264949#comment-15264949 ] Matt McCline commented on HIVE-12827: - Also committed to branch-1 > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Fix For: 1.3.0, 2.0.0, 2.1.0 > > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign > extends VectorColumnAssignVectorBase { > protected void assignDouble(double value, int destIndex) { > outCol.vector[destIndex] = value; > } > } > {code} > The pattern to imitate would be the earlier code from VectorBatchUtil > {code} > case DOUBLE: { > DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + > colIndex]; > if (writableCol != null) { > dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); > dcv.isNull[rowIndex] = false; > } else { > dcv.vector[rowIndex] = Double.NaN; > setNullColIsNullValue(dcv, rowIndex); > } > } > break; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15106070#comment-15106070 ] Gopal V commented on HIVE-12827: Yeah, it should be - I will cherry-pick this and amend the commit text when doing so. > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Fix For: 2.1.0 > > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign > extends VectorColumnAssignVectorBase { > protected void assignDouble(double value, int destIndex) { > outCol.vector[destIndex] = value; > } > } > {code} > The pattern to imitate would be the earlier code from VectorBatchUtil > {code} > case DOUBLE: { > DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + > colIndex]; > if (writableCol != null) { > dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); > dcv.isNull[rowIndex] = false; > } else { > dcv.vector[rowIndex] = Double.NaN; > setNullColIsNullValue(dcv, rowIndex); > } > } > break; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15106030#comment-15106030 ] Lefty Leverenz commented on HIVE-12827: --- [~gopalv], the commit doesn't include the JIRA number, although the summary text makes it easy enough to find. Please add this to the errata.txt file that was created by HIVE-11704. Commit: 9cab4414caf1bba2eb1852536a9d3676ba7eab21. > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Fix For: 2.1.0 > > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign > extends VectorColumnAssignVectorBase { > protected void assignDouble(double value, int destIndex) { > outCol.vector[destIndex] = value; > } > } > {code} > The pattern to imitate would be the earlier code from VectorBatchUtil > {code} > case DOUBLE: { > DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + > colIndex]; > if (writableCol != null) { > dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); > dcv.isNull[rowIndex] = false; > } else { > dcv.vector[rowIndex] = Double.NaN; > setNullColIsNullValue(dcv, rowIndex); > } > } > break; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15106058#comment-15106058 ] Gopal V commented on HIVE-12827: Thanks lefty, looks like I left out the JIRA id for all commits today. Pushed errata.txt > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Fix For: 2.1.0 > > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign > extends VectorColumnAssignVectorBase { > protected void assignDouble(double value, int destIndex) { > outCol.vector[destIndex] = value; > } > } > {code} > The pattern to imitate would be the earlier code from VectorBatchUtil > {code} > case DOUBLE: { > DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + > colIndex]; > if (writableCol != null) { > dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); > dcv.isNull[rowIndex] = false; > } else { > dcv.vector[rowIndex] = Double.NaN; > setNullColIsNullValue(dcv, rowIndex); > } > } > break; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15106088#comment-15106088 ] Lefty Leverenz commented on HIVE-12827: --- Great, thanks Gopal. > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Fix For: 2.0.0, 2.1.0 > > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign > extends VectorColumnAssignVectorBase { > protected void assignDouble(double value, int destIndex) { > outCol.vector[destIndex] = value; > } > } > {code} > The pattern to imitate would be the earlier code from VectorBatchUtil > {code} > case DOUBLE: { > DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + > colIndex]; > if (writableCol != null) { > dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); > dcv.isNull[rowIndex] = false; > } else { > dcv.vector[rowIndex] = Double.NaN; > setNullColIsNullValue(dcv, rowIndex); > } > } > break; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15106063#comment-15106063 ] Sergey Shelukhin commented on HIVE-12827: - Should this be in 2.0? looks like a bad bug > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Fix For: 2.1.0 > > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign > extends VectorColumnAssignVectorBase { > protected void assignDouble(double value, int destIndex) { > outCol.vector[destIndex] = value; > } > } > {code} > The pattern to imitate would be the earlier code from VectorBatchUtil > {code} > case DOUBLE: { > DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + > colIndex]; > if (writableCol != null) { > dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); > dcv.isNull[rowIndex] = false; > } else { > dcv.vector[rowIndex] = Double.NaN; > setNullColIsNullValue(dcv, rowIndex); > } > } > break; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15103060#comment-15103060 ] Hive QA commented on HIVE-12827: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12782483/HIVE-12827.2.patch {color:green}SUCCESS:{color} +1 due to 2 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 8 failed/errored test(s), 10004 tests executed *Failed tests:* {noformat} TestHWISessionManager - did not produce a TEST-*.xml file TestSparkCliDriver-timestamp_lazy.q-bucketsortoptimize_insert_4.q-date_udf.q-and-12-more - did not produce a TEST-*.xml file org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_tez_union org.apache.hadoop.hive.cli.TestNegativeCliDriver.testNegativeCliDriver_authorization_uri_import org.apache.hadoop.hive.metastore.txn.TestCompactionTxnHandler.testRevokeTimedOutWorkers org.apache.hadoop.hive.ql.exec.spark.session.TestSparkSessionManagerImpl.testMultiSessionMultipleUse org.apache.hadoop.hive.ql.exec.spark.session.TestSparkSessionManagerImpl.testSingleSessionMultipleUse org.apache.hive.jdbc.TestSSL.testSSLVersion {noformat} Test results: http://ec2-174-129-184-35.compute-1.amazonaws.com/jenkins/job/PreCommit-HIVE-TRUNK-Build/6644/testReport Console output: http://ec2-174-129-184-35.compute-1.amazonaws.com/jenkins/job/PreCommit-HIVE-TRUNK-Build/6644/console Test logs: http://ec2-174-129-184-35.compute-1.amazonaws.com/logs/PreCommit-HIVE-TRUNK-Build-6644/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 8 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12782483 - PreCommit-HIVE-TRUNK-Build > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign > extends VectorColumnAssignVectorBase { > protected void assignDouble(double value, int destIndex) { > outCol.vector[destIndex] = value; > } > } > {code} > The pattern to imitate would be the earlier code from VectorBatchUtil > {code} > case DOUBLE: { > DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + > colIndex]; > if (writableCol != null) { > dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); > dcv.isNull[rowIndex] = false; > } else { >
[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15102304#comment-15102304 ] Sergey Shelukhin commented on HIVE-12827: - +1. Did the fill() on the main path get committed, and does it need to be removed with this patch? > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign > extends VectorColumnAssignVectorBase { > protected void assignDouble(double value, int destIndex) { > outCol.vector[destIndex] = value; > } > } > {code} > The pattern to imitate would be the earlier code from VectorBatchUtil > {code} > case DOUBLE: { > DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + > colIndex]; > if (writableCol != null) { > dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); > dcv.isNull[rowIndex] = false; > } else { > dcv.vector[rowIndex] = Double.NaN; > setNullColIsNullValue(dcv, rowIndex); > } > } > break; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (HIVE-12827) Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign needs explicit isNull[offset] modification
[ https://issues.apache.org/jira/browse/HIVE-12827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15102353#comment-15102353 ] Gopal V commented on HIVE-12827: The fill after every operation is unnecessary, the theory about the other patch was that some UDF in that query wasn't handling the hasNoNulls flag. That is not true, the original issue was that a scratch column feeding the COALESCE() is reused for the Join output columns & setting a column value via FloatReader does not set the isNull[batchIndex] = false (and then the Filter on cr_return_amount removes those rows). CBO rewrites the left outer join into an inner join, pushing the filter below the join & there's no more TS-FIL-MJ-FIL as the FIL migrates to the broadcast side. Here's my simplified example, which I used to narrow down the issue to FloatTreeReader. {code} set hive.cbo.enable=false; set hive.vectorized.execution.reducesink.new.enabled=false; set hive.vectorized.execution.mapjoin.native.enabled=true; set hive.vectorized.execution.reduce.enabled=false; set hive.vectorized.execution.reduce.groupby.enabled=false; use testing; create table if not exists cs stored as orc as select IF (cs_item_sk IN ( 1365 , 2243 , 2445 , 3259 , 3267 , 4027 , 5263 , 6003 , 8371 , 9593 , 10383, 10763, 11351, 12359, 12887, 13449, 16501, 16547 ), cs_item_sk, 0) as cs_item_sk , cs_order_number, cs_net_paid, cs_quantity from tpcds_bin_partitioned_orc_200.catalog_sales where true and cs_sold_date_sk = 2452245 and cs_net_profit > 1 and cs_net_paid > 0 and cs_quantity > 0 and cs_item_sk between 1365 and 16547 ; create table if not exists cr as select cr_return_amount, cr_item_sk, cr_order_number from tpcds_bin_partitioned_orc_200.catalog_returns where cr_returned_date_sk between 2452351 and 2452400 and cr_item_sk IN ( 1365 , 2243 , 2445 , 3259 , 3267 , 4027 , 5263 , 6003 , 8371 , 9593 , 10383, 10763, 11351, 12359, 12887, 13449, 16501, 16547 ) order by cr_item_sk ; select * from (select cs.cs_item_sk as item, coalesce(cr.cr_return_amount,0) as return_amount ,coalesce(cs.cs_net_paid,0) as net_paid -- (cast(sum(coalesce(cr.cr_return_amount,0)) as double)/ -- cast(sum(coalesce(cs.cs_net_paid,0)) as double)) as currency_ratio from cs -- catalog_sales cs left outer join cr -- catalog_returns cr on cs.cs_order_number = cr.cr_order_number and cs.cs_item_sk = cr.cr_item_sk where cr.cr_return_amount > 1 and cs.cs_quantity > 0 -- group by cs.cs_item_sk ) x; {code} > Vectorization: VectorCopyRow/VectorAssignRow/VectorDeserializeRow assign > needs explicit isNull[offset] modification > --- > > Key: HIVE-12827 > URL: https://issues.apache.org/jira/browse/HIVE-12827 > Project: Hive > Issue Type: Bug >Reporter: Gopal V >Assignee: Gopal V > Attachments: HIVE-12827.2.patch > > > Some scenarios do set Double.NaN instead of isNull=true, but all types aren't > consistent. > Examples of un-set isNull for the valid values are > {code} > private class FloatReader extends AbstractDoubleReader { > FloatReader(int columnIndex) { > super(columnIndex); > } > @Override > void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { > DoubleColumnVector colVector = (DoubleColumnVector) > batch.cols[columnIndex]; > if (deserializeRead.readCheckNull()) { > VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); > } else { > float value = deserializeRead.readFloat(); > colVector.vector[batchIndex] = (double) value; > } > } > } > {code} > {code} > private class DoubleCopyRow extends CopyRow { > DoubleCopyRow(int inColumnIndex, int outColumnIndex) { > super(inColumnIndex, outColumnIndex); > } > @Override > void copy(VectorizedRowBatch inBatch, int inBatchIndex, > VectorizedRowBatch outBatch, int outBatchIndex) { > DoubleColumnVector inColVector = (DoubleColumnVector) > inBatch.cols[inColumnIndex]; > DoubleColumnVector outColVector = (DoubleColumnVector) > outBatch.cols[outColumnIndex]; > if (inColVector.isRepeating) { > if (inColVector.noNulls || !inColVector.isNull[0]) { > outColVector.vector[outBatchIndex] = inColVector.vector[0]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } else { > if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { > outColVector.vector[outBatchIndex] = > inColVector.vector[inBatchIndex]; > } else { > VectorizedBatchUtil.setNullColIsNullValue(outColVector, > outBatchIndex); > } > } > } > } > {code} > {code} > private static abstract class VectorDoubleColumnAssign >