[ https://issues.apache.org/jira/browse/ARROW-1717?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16250679#comment-16250679 ]
ASF GitHub Bot commented on ARROW-1717: --------------------------------------- wesm closed pull request #1290: ARROW-1717: Refactor JsonReader URL: https://github.com/apache/arrow/pull/1290 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index 6587cde63..47daeee7f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -909,52 +909,6 @@ protected final void handleSafe(int index, int dataLength) { } } - - /****************************************************************** - * * - * helper methods currently * - * used by JsonFileReader and * - * JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Method used by Json Reader to explicitly set the data of the variable - * width vector elements. The method takes care of allocating the memory - * for the vector if caller hasn't done so. - * - * This method should not be used externally. - * - * @param data ArrowBuf for storing variable width elements in the vector - * @param offset offset of the element - * @param allocator memory allocator - * @param index position of the element in the vector - * @param value array of bytes for the element - * @param valueCount number of elements in the vector - * @return buffer holding the variable width data. - */ - public static ArrowBuf set(ArrowBuf data, ArrowBuf offset, - BufferAllocator allocator, int index, byte[] value, - int valueCount) { - if (data == null) { - data = allocator.buffer(INITIAL_BYTE_COUNT); - } - final int currentBufferCapacity = data.capacity(); - final int currentStartOffset = offset.getInt(index * OFFSET_WIDTH); - while (currentBufferCapacity < currentStartOffset + value.length) { - final ArrowBuf newBuf = allocator.buffer(currentBufferCapacity * 2); - newBuf.setBytes(0, data, 0, currentBufferCapacity); - data.release(); - data = newBuf; - } - data.setBytes(currentStartOffset, value, 0, value.length); - if (index == (valueCount - 1)) { - data.writerIndex(offset.getInt(valueCount * OFFSET_WIDTH)); - } - return data; - } - /** * Method used by Json Writer to read a variable width element from * the variable width vector and write to Json. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java index ee40d708c..253427333 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableBigIntVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 8; + public static final byte TYPE_WIDTH = 8; private final FieldReader reader; /** @@ -290,41 +290,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java index 949287eca..e6b5b590f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java @@ -292,40 +292,6 @@ public void setSafe(int index, int isSet, int value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java index a0bdccede..8e15100f7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java @@ -296,40 +296,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java index 8320f9083..2a611c6a2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java @@ -37,7 +37,7 @@ * maintained to track which elements in the vector are null. */ public class NullableDecimalVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 16; + public static final byte TYPE_WIDTH = 16; private final FieldReader reader; private final int precision; @@ -357,41 +357,6 @@ public void setSafe(int index, int isSet, int start, ArrowBuf buffer) { /****************************************************************** * * - * helper routines currently * - * used in JsonFileReader * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value as array of bytes - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, byte[] value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - DecimalUtility.writeByteArrayToArrowBuf(value, buffer, index); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - - - /****************************************************************** - * * * vector transfer * * * ******************************************************************/ diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java index 5b28065c4..3ba5cfcea 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableFloat4Vector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 4; + public static final byte TYPE_WIDTH = 4; private final FieldReader reader; /** @@ -291,41 +291,6 @@ public void setSafe(int index, int isSet, float value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, float value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setFloat(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java index 624abf2f2..2fb96a44f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableFloat8Vector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 8; + public static final byte TYPE_WIDTH = 8; private final FieldReader reader; /** @@ -291,41 +291,6 @@ public void setSafe(int index, int isSet, double value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, double value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setDouble(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java index 6311daf4f..93deacbdf 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableIntVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 4; + public static final byte TYPE_WIDTH = 4; private final FieldReader reader; /** @@ -291,40 +291,6 @@ public void setSafe(int index, int isSet, int value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java index c45a8d5f5..ed337188b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableSmallIntVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 2; + public static final byte TYPE_WIDTH = 2; private final FieldReader reader; /** @@ -319,41 +319,6 @@ public void setSafe(int index, int isSet, short value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, short value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setShort(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java index 454a4ac41..d7ea32223 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java @@ -292,40 +292,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java index 8540d1692..07d8abba4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java @@ -294,40 +294,6 @@ public void setSafe(int index, int isSet, int value) { } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java index 015226da2..947b2392f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java @@ -291,40 +291,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java index 2b2375e92..0a3cfaac0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java @@ -291,40 +291,6 @@ public void setSafe(int index, int isSet, int value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java index b2a58bd45..1bf2abc38 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java @@ -165,42 +165,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and * - * JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java index 15100530d..ccbfa32b8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java @@ -318,42 +318,6 @@ public void setSafe(int index, int isSet, byte value) { set(index, isSet, value); } - - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, byte value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setByte(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java index c6a82510e..560b0b9c5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java @@ -27,15 +27,17 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.Charset; import java.util.*; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import io.netty.buffer.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.*; -import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.file.InvalidArrowFileException; import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.schema.ArrowVectorType; import org.apache.arrow.vector.types.Types; @@ -183,6 +185,282 @@ public VectorSchemaRoot read() throws IOException { } } + private abstract class BufferReader { + abstract protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException; + + final ArrowBuf readBuffer(BufferAllocator allocator, int count) throws IOException { + readToken(START_ARRAY); + ArrowBuf buf = read(allocator, count); + readToken(END_ARRAY); + return buf; + } + } + + private class BufferHelper { + BufferReader BIT = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + final int bufferSize = BitVectorHelper.getValidityBufferSize(count); + ArrowBuf buf = allocator.buffer(bufferSize); + + // C++ integration test fails without this. + buf.setZero(0, bufferSize); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + BitVectorHelper.setValidityBit(buf, i, parser.readValueAs(Boolean.class) ? 1 : 0); + } + + return buf; + } + }; + + BufferReader INT1 = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableTinyIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeByte(parser.getByteValue()); + } + + return buf; + } + }; + + BufferReader INT2 = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableSmallIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeShort(parser.getShortValue()); + } + + return buf; + } + }; + + BufferReader INT4 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeInt(parser.getIntValue()); + } + + return buf; + } + }; + + BufferReader INT8 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableBigIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeLong(parser.getLongValue()); + } + + return buf; + } + }; + + BufferReader FLOAT4 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableFloat4Vector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeFloat(parser.getFloatValue()); + } + + return buf; + } + }; + + BufferReader FLOAT8 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableFloat8Vector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeDouble(parser.getDoubleValue()); + } + + return buf; + } + }; + + BufferReader DECIMAL = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableDecimalVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + final byte[] value = decodeHexSafe(parser.getValueAsString()); + DecimalUtility.writeByteArrayToArrowBuf(value, buf, i); + } + + return buf; + } + }; + + BufferReader VARCHAR = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrayList<byte[]> values = Lists.newArrayList(); + int bufferSize = 0; + for (int i = 0; i < count; i++) { + parser.nextToken(); + final byte[] value = parser.getValueAsString().getBytes(UTF_8); + values.add(value); + bufferSize += value.length; + + } + + ArrowBuf buf = allocator.buffer(bufferSize); + + for (byte[] value : values) { + buf.writeBytes(value); + } + + return buf; + } + }; + + BufferReader VARBINARY = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrayList<byte[]> values = Lists.newArrayList(); + int bufferSize = 0; + for (int i = 0; i < count; i++) { + parser.nextToken(); + final byte[] value = decodeHexSafe(parser.readValueAs(String.class)); + values.add(value); + bufferSize += value.length; + + } + + ArrowBuf buf = allocator.buffer(bufferSize); + + for (byte[] value : values) { + buf.writeBytes(value); + } + + return buf; + } + }; + + } + + private ArrowBuf readBuffer(BufferAllocator allocator, ArrowVectorType bufferType, Types.MinorType type, int count) throws IOException { + ArrowBuf buf; + + BufferHelper helper = new BufferHelper(); + + BufferReader reader = null; + + if (bufferType.equals(VALIDITY)) { + reader = helper.BIT; + } else if (bufferType.equals(OFFSET)) { + reader = helper.INT4; + } else if (bufferType.equals(TYPE)) { + reader = helper.INT1; + } else if (bufferType.equals(DATA)) { + switch (type) { + case BIT: + reader = helper.BIT; + break; + case TINYINT: + reader = helper.INT1; + break; + case SMALLINT: + reader = helper.INT2; + break; + case INT: + reader = helper.INT4; + break; + case BIGINT: + reader = helper.INT8; + break; + case UINT1: + reader = helper.INT1; + break; + case UINT2: + reader = helper.INT2; + break; + case UINT4: + reader = helper.INT4; + break; + case UINT8: + reader = helper.INT8; + break; + case FLOAT4: + reader = helper.FLOAT4; + break; + case FLOAT8: + reader = helper.FLOAT8; + break; + case DECIMAL: + reader = helper.DECIMAL; + break; + case VARCHAR: + reader = helper.VARCHAR; + break; + case VARBINARY: + reader = helper.VARBINARY; + break; + case DATEDAY: + reader = helper.INT4; + break; + case DATEMILLI: + reader = helper.INT8; + break; + case TIMESEC: + case TIMEMILLI: + reader = helper.INT4; + break; + case TIMEMICRO: + case TIMENANO: + reader = helper.INT8; + break; + case TIMESTAMPNANO: + case TIMESTAMPMICRO: + case TIMESTAMPMILLI: + case TIMESTAMPSEC: + case TIMESTAMPNANOTZ: + case TIMESTAMPMICROTZ: + case TIMESTAMPMILLITZ: + case TIMESTAMPSECTZ: + reader = helper.INT8; + break; + default: + throw new UnsupportedOperationException("Cannot read array of type " + type); + } + } else { + throw new InvalidArrowFileException("Unrecognized buffer type " + bufferType); + } + + buf = reader.readBuffer(allocator, count); + assert buf != null; + return buf; + } + private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException { List<ArrowVectorType> vectorTypes = field.getTypeLayout().getVectorTypes(); ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()]; @@ -209,7 +487,7 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json throw new IllegalArgumentException("Expected field " + field.getName() + " but got " + name); } - /* Initialize the vector with required capacity but don't allocate since we would + /* Initialize the vector with required capacity but don't allocateNew since we would * be doing loadFieldBuffers. */ int valueCount = readNextField("count", Integer.class); @@ -218,29 +496,13 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json for (int v = 0; v < vectorTypes.size(); v++) { ArrowVectorType vectorType = vectorTypes.get(v); nextFieldIs(vectorType.getName()); - readToken(START_ARRAY); int innerBufferValueCount = valueCount; if (vectorType.equals(OFFSET)) { /* offset buffer has 1 additional value capacity */ innerBufferValueCount = valueCount + 1; } - for (int i = 0; i < innerBufferValueCount; i++) { - /* write data to the buffer */ - parser.nextToken(); - /* for variable width vectors, value count doesn't help pre-determining the capacity of - * the underlying data buffer. So we need to pass down the offset buffer (which was already - * populated in the previous iteration of this loop). - */ - if (vectorType.equals(DATA) && (vector.getMinorType() == Types.MinorType.VARCHAR - || vector.getMinorType() == Types.MinorType.VARBINARY)) { - vectorBuffers[v] = setValueFromParser(vectorType, vector, vectorBuffers[v], - vectorBuffers[v-1], i, innerBufferValueCount); - } else { - vectorBuffers[v] = setValueFromParser(vectorType, vector, vectorBuffers[v], - null, i, innerBufferValueCount); - } - } - readToken(END_ARRAY); + + vectorBuffers[v] = readBuffer(allocator, vectorType, vector.getMinorType(), innerBufferValueCount); } vector.loadFieldBuffers(new ArrowFieldNode(valueCount, 0), Arrays.asList(vectorBuffers)); @@ -250,7 +512,8 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json if (!fields.isEmpty()) { List<FieldVector> vectorChildren = vector.getChildrenFromFields(); if (fields.size() != vectorChildren.size()) { - throw new IllegalArgumentException("fields and children are not the same size: " + fields.size() + " != " + vectorChildren.size()); + throw new IllegalArgumentException( + "fields and children are not the same size: " + fields.size() + " != " + vectorChildren.size()); } nextFieldIs("children"); readToken(START_ARRAY); @@ -277,125 +540,6 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json } } - private ArrowBuf setValueFromParser(ArrowVectorType bufferType, FieldVector vector, - ArrowBuf buffer, ArrowBuf offsetBuffer, int index, int valueCount) throws IOException { - if (bufferType.equals(TYPE)) { - buffer = NullableTinyIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Byte.class)); - } else if (bufferType.equals(OFFSET)) { - buffer = BaseNullableVariableWidthVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - } else if (bufferType.equals(VALIDITY)) { - buffer = BitVectorHelper.setValidityBit(buffer, allocator, - valueCount, index, parser.readValueAs(Boolean.class) ? 1 : 0); - } else if (bufferType.equals(DATA)) { - switch (vector.getMinorType()) { - case BIT: - buffer = BitVectorHelper.setValidityBit(buffer, allocator, - valueCount, index, parser.readValueAs(Boolean.class) ? 1 : 0); - break; - case TINYINT: - buffer = NullableTinyIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Byte.class)); - break; - case SMALLINT: - buffer = NullableSmallIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Short.class)); - break; - case INT: - buffer = NullableIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case BIGINT: - buffer = NullableBigIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case FLOAT4: - buffer = NullableFloat4Vector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Float.class)); - break; - case FLOAT8: - buffer = NullableFloat8Vector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Double.class)); - break; - case DECIMAL: - buffer = NullableDecimalVector.set(buffer, allocator, - valueCount, index, decodeHexSafe(parser.readValueAs(String.class))); - break; - case VARBINARY: - assert (offsetBuffer != null); - buffer = BaseNullableVariableWidthVector.set(buffer, offsetBuffer, allocator, index, - decodeHexSafe(parser.readValueAs(String.class)), valueCount); - break; - case VARCHAR: - assert (offsetBuffer != null); - buffer = BaseNullableVariableWidthVector.set(buffer, offsetBuffer, allocator, index, - parser.readValueAs(String.class).getBytes(UTF_8), valueCount); - break; - case DATEDAY: - buffer = NullableDateDayVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case DATEMILLI: - buffer = NullableDateMilliVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESEC: - buffer = NullableTimeSecVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case TIMEMILLI: - buffer = NullableTimeMilliVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case TIMEMICRO: - buffer = NullableTimeMicroVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMENANO: - buffer = NullableTimeNanoVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPSEC: - buffer = NullableTimeStampSecVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMILLI: - buffer = NullableTimeStampMilliVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMICRO: - buffer = NullableTimeStampMicroVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPNANO: - buffer = NullableTimeStampNanoVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPSECTZ: - buffer = NullableTimeStampSecTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMILLITZ: - buffer = NullableTimeStampMilliTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMICROTZ: - buffer = NullableTimeStampMicroTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPNANOTZ: - buffer = NullableTimeStampNanoTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - default: - throw new UnsupportedOperationException("minor type: " + vector.getMinorType()); - } - } - - return buffer; - } - @Override public void close() throws IOException { parser.close(); ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > [Java] Remove public static helper method in vector classes for > JSONReader/Writer > --------------------------------------------------------------------------------- > > Key: ARROW-1717 > URL: https://issues.apache.org/jira/browse/ARROW-1717 > Project: Apache Arrow > Issue Type: Sub-task > Reporter: Li Jin > Assignee: Li Jin > Labels: pull-request-available > Fix For: 0.8.0 > > -- This message was sent by Atlassian JIRA (v6.4.14#64029)