Repository: hive Updated Branches: refs/heads/branch-3.1 7e46905c6 -> 7e649028c
HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers (Teddy Choi, reviewed by Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7e649028 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7e649028 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7e649028 Branch: refs/heads/branch-3.1 Commit: 7e649028c343c3e5491d5fc5b3d271cb897f3f21 Parents: 7e46905 Author: Teddy Choi <pudi...@gmail.com> Authored: Thu Jul 12 06:13:40 2018 +0900 Committer: Teddy Choi <pudi...@gmail.com> Committed: Thu Jul 12 06:26:00 2018 +0900 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java | 6 ++++++ .../apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java | 4 ++++ 2 files changed, 10 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/7e649028/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index d177e3f..889bd58 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -147,6 +147,12 @@ public class RecordReaderImpl extends org.apache.orc.impl.RecordReaderImpl public boolean nextBatch(VectorizedRowBatch theirBatch) throws IOException { // If the user hasn't been reading by row, use the fast path. if (rowInBatch >= batch.size) { + if (batch.size > 0) { + // the local batch has been consumed entirely, reset it + batch.reset(); + } + baseRow = super.getRowNumber(); + rowInBatch = 0; return super.nextBatch(theirBatch); } copyIntoBatch(theirBatch, batch, rowInBatch); http://git-wip-us.apache.org/repos/asf/hive/blob/7e649028/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index 2071d13..aa99e57 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -154,9 +154,13 @@ public class TestVectorizedORCReader { VectorizedRowBatch batch = reader.getSchema().createRowBatchV2(); OrcStruct row = null; + long lastRowNumber = -1; // Check Vectorized ORC reader against ORC row reader while (vrr.nextBatch(batch)) { + Assert.assertEquals(lastRowNumber + 1, vrr.getRowNumber()); for (int i = 0; i < batch.size; i++) { + Assert.assertEquals(rr.getRowNumber(), vrr.getRowNumber()+i); + lastRowNumber = rr.getRowNumber(); row = (OrcStruct) rr.next(row); for (int j = 0; j < batch.cols.length; j++) { Object a = (row.getFieldValue(j));