Repository: arrow Updated Branches: refs/heads/master 3b14765e8 -> 1dd0f5f58
ARROW-1267: [Java] Handle zero length case in BitVector.splitAndTransfer @jacques-n @StevenMPhillips PR for the change: https://github.com/dremio/arrow/commit/b794dfa5fe209cf8e3c17cb828964a0a0863c1f8 A new unit test has been added on top of original change. Author: Steven Phillips <[email protected]> Author: siddharth <[email protected]> Closes #890 from siddharthteotia/ARROW-1267-PR and squashes the following commits: 89a08d9 [siddharth] Handle zero length in BitVector.splitAndTransfer 84946b7 [Steven Phillips] Handle zero length case in BitVector.splitAndTransfer() b55c146 [Steven Phillips] Handle zero length case in BitVector.splitAndTransfer() Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/1dd0f5f5 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/1dd0f5f5 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/1dd0f5f5 Branch: refs/heads/master Commit: 1dd0f5f580bd751d6e10879775d9441358c4fe66 Parents: 3b14765 Author: Steven Phillips <[email protected]> Authored: Fri Jul 28 17:09:54 2017 -0700 Committer: Steven Phillips <[email protected]> Committed: Fri Jul 28 17:09:54 2017 -0700 ---------------------------------------------------------------------- .../java/org/apache/arrow/vector/BitVector.java | 52 +++++++------ .../org/apache/arrow/vector/TestBitVector.java | 80 +++++++++++++++++++- 2 files changed, 106 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/1dd0f5f5/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java index 82cbd47..f34ef2c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java @@ -261,32 +261,34 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe int firstByte = getByteIndex(startIndex); int byteSize = getSizeFromCount(length); int offset = startIndex % 8; - if (offset == 0) { - target.clear(); - // slice - if (target.data != null) { - target.data.release(); - } - target.data = data.slice(firstByte, byteSize); - target.data.retain(1); - } else { - // Copy data - // When the first bit starts from the middle of a byte (offset != 0), copy data from src BitVector. - // Each byte in the target is composed by a part in i-th byte, another part in (i+1)-th byte. - // The last byte copied to target is a bit tricky : - // 1) if length requires partly byte (length % 8 !=0), copy the remaining bits only. - // 2) otherwise, copy the last byte in the same way as to the prior bytes. - target.clear(); - target.allocateNew(length); - // TODO maybe do this one word at a time, rather than byte? - for(int i = 0; i < byteSize - 1; i++) { - target.data.setByte(i, (((this.data.getByte(firstByte + i) & 0xFF) >>> offset) + (this.data.getByte(firstByte + i + 1) << (8 - offset)))); - } - if (length % 8 != 0) { - target.data.setByte(byteSize - 1, ((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset)); + if (length > 0) { + if (offset == 0) { + target.clear(); + // slice + if (target.data != null) { + target.data.release(); + } + target.data = data.slice(firstByte, byteSize); + target.data.retain(1); } else { - target.data.setByte(byteSize - 1, - (((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset) + (this.data.getByte(firstByte + byteSize) << (8 - offset)))); + // Copy data + // When the first bit starts from the middle of a byte (offset != 0), copy data from src BitVector. + // Each byte in the target is composed by a part in i-th byte, another part in (i+1)-th byte. + // The last byte copied to target is a bit tricky : + // 1) if length requires partly byte (length % 8 !=0), copy the remaining bits only. + // 2) otherwise, copy the last byte in the same way as to the prior bytes. + target.clear(); + target.allocateNew(length); + // TODO maybe do this one word at a time, rather than byte? + for (int i = 0; i < byteSize - 1; i++) { + target.data.setByte(i, (((this.data.getByte(firstByte + i) & 0xFF) >>> offset) + (this.data.getByte(firstByte + i + 1) << (8 - offset)))); + } + if (length % 8 != 0) { + target.data.setByte(byteSize - 1, ((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset)); + } else { + target.data.setByte(byteSize - 1, + (((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset) + (this.data.getByte(firstByte + byteSize) << (8 - offset)))); + } } } target.getMutator().setValueCount(length); http://git-wip-us.apache.org/repos/asf/arrow/blob/1dd0f5f5/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java index f2343c8..194b785 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java @@ -20,6 +20,8 @@ package org.apache.arrow.vector; import static org.junit.Assert.assertEquals; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.util.TransferPair; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -31,7 +33,7 @@ public class TestBitVector { @Before public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); + allocator = new RootAllocator(Long.MAX_VALUE); } @After @@ -63,4 +65,80 @@ public class TestBitVector { } } + @Test + public void testSplitAndTransfer() throws Exception { + + try (final BitVector sourceVector = new BitVector("bitvector", allocator)) { + final BitVector.Mutator sourceMutator = sourceVector.getMutator(); + final BitVector.Accessor sourceAccessor = sourceVector.getAccessor(); + + sourceVector.allocateNew(40); + + /* populate the bitvector -- 010101010101010101010101..... */ + for(int i = 0; i < 40; i++) { + if((i & 1) == 1) { + sourceMutator.set(i, 1); + } + else { + sourceMutator.set(i, 0); + } + } + + sourceMutator.setValueCount(40); + + /* check the vector output */ + for(int i = 0; i < 40; i++) { + int result = sourceAccessor.get(i); + if((i & 1) == 1) { + assertEquals(Integer.toString(1), Integer.toString(result)); + } + else { + assertEquals(Integer.toString(0), Integer.toString(result)); + } + } + + final TransferPair transferPair = sourceVector.getTransferPair(allocator); + final BitVector toVector = (BitVector)transferPair.getTo(); + final BitVector.Accessor toAccessor = toVector.getAccessor(); + final BitVector.Mutator toMutator = toVector.getMutator(); + + /* + * form test cases such that we cover: + * + * (1) the start index is exactly where a particular byte starts in the source bit vector + * (2) the start index is randomly positioned within a byte in the source bit vector + * (2.1) the length is a multiple of 8 + * (2.2) the length is not a multiple of 8 + */ + final int[][] transferLengths = { {0, 8}, /* (1) */ + {8, 10}, /* (1) */ + {18, 0}, /* zero length scenario */ + {18, 8}, /* (2.1) */ + {26, 0}, /* zero length scenario */ + {26, 14} /* (2.2) */ + }; + + for (final int[] transferLength : transferLengths) { + final int start = transferLength[0]; + final int length = transferLength[1]; + + transferPair.splitAndTransfer(start, length); + + /* check the toVector output after doing splitAndTransfer */ + for (int i = 0; i < length; i++) { + int result = toAccessor.get(i); + if((i & 1) == 1) { + assertEquals(Integer.toString(1), Integer.toString(result)); + } + else { + assertEquals(Integer.toString(0), Integer.toString(result)); + } + } + + toVector.clear(); + } + + sourceVector.close(); + } + } }
