This is an automated email from the ASF dual-hosted git repository. ravindra pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new ae03c54 ARROW-5903: [Java] Optimise set methods in decimal vector ae03c54 is described below commit ae03c544beae36637e4418159ad951ac6440fac1 Author: Pindikura Ravindra <ravin...@dremio.com> AuthorDate: Thu Jul 11 12:30:27 2019 +0530 ARROW-5903: [Java] Optimise set methods in decimal vector - reduce the number of bound checks - simplify loop Author: Pindikura Ravindra <ravin...@dremio.com> Closes #4847 from pravindra/arrow-5903-new and squashes the following commits: 8376b8b69 <Pindikura Ravindra> ARROW-5903: fix style check error 5a4daec97 <Pindikura Ravindra> ARROW-5903: Optimise set methods in decimal vector --- .../arrow/vector/DecimalVectorBenchmarks.java | 124 +++++++++++++++++++++ .../org/apache/arrow/vector/DecimalVector.java | 85 +++++++------- 2 files changed, 165 insertions(+), 44 deletions(-) diff --git a/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java new file mode 100644 index 0000000..aaa8deb --- /dev/null +++ b/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import java.math.BigDecimal; +import java.util.concurrent.TimeUnit; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.junit.Test; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import io.netty.buffer.ArrowBuf; + +/** + * Benchmarks for {@link DecimalVector}. + */ +@State(Scope.Benchmark) +public class DecimalVectorBenchmarks { + + private static final int VECTOR_LENGTH = 1024; + + private static final int ALLOCATOR_CAPACITY = 1024 * 1024; + + private BufferAllocator allocator; + + private DecimalVector vector; + + private ArrowBuf fromBuf; + + byte[] fromByteArray; + + /** + * Setup benchmarks. + */ + @Setup + public void prepare() { + allocator = new RootAllocator(ALLOCATOR_CAPACITY); + vector = new DecimalVector("vector", allocator, 38, 16); + vector.allocateNew(VECTOR_LENGTH); + + fromBuf = allocator.buffer(VECTOR_LENGTH * DecimalVector.TYPE_WIDTH); + for (int i = 0; i < VECTOR_LENGTH; i++) { + byte[] bytes = BigDecimal.valueOf(i).unscaledValue().toByteArray(); + fromBuf.setBytes(i * DecimalVector.TYPE_WIDTH, bytes); + } + + fromByteArray = new byte[DecimalVector.TYPE_WIDTH]; + fromBuf.getBytes(0, fromByteArray); + } + + /** + * Tear down benchmarks. + */ + @TearDown + public void tearDown() { + fromBuf.close(); + vector.close(); + allocator.close(); + } + + /** + * Test writing on {@link DecimalVector} from arrow buf. + */ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setBigEndianArrowBufBenchmark() { + int offset = 0; + + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.setBigEndianSafe(i, offset, fromBuf, DecimalVector.TYPE_WIDTH); + offset += 8; + } + } + + /** + * Test writing on {@link DecimalVector} from byte array. + */ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setBigEndianByteArrayBenchmark() { + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.setBigEndian(i, fromByteArray); + } + } + + @Test + public void evaluate() throws RunnerException { + Options opt = new OptionsBuilder() + .include(DecimalVectorBenchmarks.class.getSimpleName()) + .forks(1) + .build(); + + new Runner(opt).run(); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java index 1db83a1..cf77186 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java @@ -34,6 +34,7 @@ import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.TransferPair; import io.netty.buffer.ArrowBuf; +import io.netty.util.internal.PlatformDependent; /** * DecimalVector implements a fixed width vector (16 bytes) of @@ -206,41 +207,30 @@ public class DecimalVector extends BaseFixedWidthVector { public void setBigEndian(int index, byte[] value) { BitVectorHelper.setValidityBitToOne(validityBuffer, index); final int length = value.length; - int startIndex = index * TYPE_WIDTH; - if (length == TYPE_WIDTH) { - for (int i = TYPE_WIDTH - 1; i >= 3; i -= 4) { - valueBuffer.setByte(startIndex, value[i]); - valueBuffer.setByte(startIndex + 1, value[i - 1]); - valueBuffer.setByte(startIndex + 2, value[i - 2]); - valueBuffer.setByte(startIndex + 3, value[i - 3]); - startIndex += 4; - } - return; + // do the bound check. + valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH); + + long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH; + // swap bytes to convert BE to LE + for (int byteIdx = 0; byteIdx < length; ++byteIdx) { + PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]); } - if (length == 0) { - valueBuffer.setZero(startIndex, TYPE_WIDTH); + if (length == TYPE_WIDTH) { return; } - if (length < 16) { - for (int i = length - 1; i >= 0; i--) { - valueBuffer.setByte(startIndex, value[i]); - startIndex++; - } - + if (length == 0) { + PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH, (byte)0); + } else if (length < TYPE_WIDTH) { + // sign extend final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00); - final int maxStartIndex = (index + 1) * TYPE_WIDTH; - while (startIndex < maxStartIndex) { - valueBuffer.setByte(startIndex, pad); - startIndex++; - } - - return; + PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad); + } else { + throw new IllegalArgumentException( + "Invalid decimal value length. Valid length in [1 - 16], got " + length); } - - throw new IllegalArgumentException("Invalid decimal value length. Valid length in [1 - 16], got " + length); } /** @@ -265,17 +255,19 @@ public class DecimalVector extends BaseFixedWidthVector { public void setSafe(int index, int start, ArrowBuf buffer, int length) { handleSafe(index); BitVectorHelper.setValidityBitToOne(validityBuffer, index); - int startIndexInVector = index * TYPE_WIDTH; - valueBuffer.setBytes(startIndexInVector, buffer, start, length); + + // do the bound checks. + buffer.checkBytes(start, start + length); + valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH); + + long inAddress = buffer.memoryAddress() + start; + long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH; + PlatformDependent.copyMemory(inAddress, outAddress, length); // sign extend if (length < 16) { - byte msb = buffer.getByte(start + length - 1); + byte msb = PlatformDependent.getByte(inAddress + length - 1); final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - int startIndex = startIndexInVector + length; - int endIndex = startIndexInVector + TYPE_WIDTH; - for (int i = startIndex; i < endIndex; i++) { - valueBuffer.setByte(i, pad); - } + PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad); } } @@ -290,19 +282,24 @@ public class DecimalVector extends BaseFixedWidthVector { public void setBigEndianSafe(int index, int start, ArrowBuf buffer, int length) { handleSafe(index); BitVectorHelper.setValidityBitToOne(validityBuffer, index); - int startIndexInVector = index * TYPE_WIDTH; - for (int i = start + length - 1; i >= start; i--) { - valueBuffer.setByte(startIndexInVector, buffer.getByte(i)); - startIndexInVector++; + + // do the bound checks. + buffer.checkBytes(start, start + length); + valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH); + + // not using buffer.getByte() to avoid boundary checks for every byte. + long inAddress = buffer.memoryAddress() + start; + long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH; + // swap bytes to convert BE to LE + for (int byteIdx = 0; byteIdx < length; ++byteIdx) { + byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx); + PlatformDependent.putByte(outAddress + byteIdx, val); } // sign extend if (length < 16) { - byte msb = buffer.getByte(start); + byte msb = PlatformDependent.getByte(inAddress); final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - int endIndex = startIndexInVector + TYPE_WIDTH - length; - for (int i = startIndexInVector; i < endIndex; i++) { - valueBuffer.setByte(i, pad); - } + PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad); } }