This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2a87693134 GH-39680: [Java] enable half float support on Java module
(#39681)
2a87693134 is described below
commit 2a87693134135a8af2ae2b6df41980176431b1c0
Author: david dali susanibar arce <[email protected]>
AuthorDate: Wed Jan 31 13:38:54 2024 -0500
GH-39680: [Java] enable half float support on Java module (#39681)
### Rationale for this change
- To enable half float support on Java module.
### What changes are included in this PR?
- [x] Add initial Float16 type support
- [x] Unit test
- [x] Integration test
- [x] Documentation
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No
* Closes: #39680
Authored-by: david dali susanibar arce <[email protected]>
Signed-off-by: David Li <[email protected]>
---
docs/source/status.rst | 9 +-
.../org/apache/arrow/dataset/TestAllTypes.java | 6 +-
.../java/org/apache/arrow/memory/util/Float16.java | 271 +++++++++++++
.../java/org/apache/arrow/memory/TestArrowBuf.java | 11 +
.../src/main/codegen/data/ValueVectorTypes.tdd | 10 +
.../src/main/codegen/templates/UnionReader.java | 6 +-
.../java/org/apache/arrow/vector/Float2Vector.java | 434 +++++++++++++++++++++
.../java/org/apache/arrow/vector/types/Types.java | 16 +-
.../org/apache/arrow/vector/TestValueVector.java | 198 ++++++++++
9 files changed, 953 insertions(+), 8 deletions(-)
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 03a8701234..11dd9c2c29 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -40,7 +40,7 @@ Data Types
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| UInt8/16/32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓
| ✓ |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Float16 | ✓ (1) | | ✓ | ✓ | ✓ (2)| ✓ | ✓
| |
+| Float16 | ✓ (1) | ✓ (2) | ✓ | ✓ | ✓ (3)| ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Float32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓
| ✓ |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -104,7 +104,7 @@ Data Types
| Data type | C++ | Java | Go | JavaScript | C# | Rust |
Julia | Swift |
| (special) | | | | | | |
| |
+===================+=======+=======+=======+============+=======+=======+=======+=======+
-| Dictionary | ✓ | ✓ (3) | ✓ | ✓ | ✓ | ✓ (3) | ✓
| |
+| Dictionary | ✓ | ✓ (4) | ✓ | ✓ | ✓ | ✓ (3) | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Extension | ✓ | ✓ | ✓ | | | ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -114,8 +114,9 @@ Data Types
Notes:
* \(1) Casting to/from Float16 in C++ is not supported.
-* \(2) Float16 support in C# is only available when targeting .NET 6+.
-* \(3) Nested dictionaries not supported
+* \(2) Casting to/from Float16 in Java is not supported.
+* \(3) Float16 support in C# is only available when targeting .NET 6+.
+* \(4) Nested dictionaries not supported
.. seealso::
The :ref:`format_columnar` specification.
diff --git
a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
index 13b2474523..6d33cf057e 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
@@ -32,6 +32,7 @@ import java.util.Objects;
import org.apache.arrow.dataset.file.DatasetFileWriter;
import org.apache.arrow.dataset.file.FileFormat;
import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.Float16;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.DateMilliVector;
@@ -39,6 +40,7 @@ import org.apache.arrow.vector.Decimal256Vector;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.DurationVector;
import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
@@ -89,7 +91,6 @@ public class TestAllTypes extends TestDataset {
private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
// Notes:
- // - Float16 is not supported by Java.
// - IntervalMonthDayNano is not supported by Parquet.
// - Map (GH-38250) and SparseUnion are resulting in serialization errors
when writing with the Dataset API.
// "Unhandled type for Arrow to Parquet schema conversion" errors:
IntervalDay, IntervalYear, DenseUnion
@@ -109,6 +110,7 @@ public class TestAllTypes extends TestDataset {
Field.nullablePrimitive("uint16", new ArrowType.Int(16, false)),
Field.nullablePrimitive("uint32", new ArrowType.Int(32, false)),
Field.nullablePrimitive("uint64", new ArrowType.Int(64, false)),
+ Field.nullablePrimitive("float16", new
ArrowType.FloatingPoint(FloatingPointPrecision.HALF)),
Field.nullablePrimitive("float32", new
ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
Field.nullablePrimitive("float64", new
ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
Field.nullablePrimitive("utf8", ArrowType.Utf8.INSTANCE),
@@ -148,6 +150,7 @@ public class TestAllTypes extends TestDataset {
root.getVector("uint16").setNull(0);
root.getVector("uint32").setNull(0);
root.getVector("uint64").setNull(0);
+ root.getVector("float16").setNull(0);
root.getVector("float32").setNull(0);
root.getVector("float64").setNull(0);
root.getVector("utf8").setNull(0);
@@ -180,6 +183,7 @@ public class TestAllTypes extends TestDataset {
((UInt2Vector) root.getVector("uint16")).set(1, 1);
((UInt4Vector) root.getVector("uint32")).set(1, 1);
((UInt8Vector) root.getVector("uint64")).set(1, 1);
+ ((Float2Vector) root.getVector("float16")).set(1,
Float16.toFloat16(+32.875f));
((Float4Vector) root.getVector("float32")).set(1, 1.0f);
((Float8Vector) root.getVector("float64")).set(1, 1.0);
((VarCharVector) root.getVector("utf8")).set(1, new Text("a"));
diff --git
a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
new file mode 100644
index 0000000000..8040158fd0
--- /dev/null
+++
b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+
+import org.apache.arrow.util.VisibleForTesting;
+
+/**
+ * Lifted from Apache Parquet MR project:
+ *
https://github.com/apache/parquet-mr/blob/e87b80308869b77f914fcfd04364686e11158950/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java
+ * <ul>
+ * Changes made:
+ * <li>Modify the data type input from Parquet-MR Binary (toFloat(Binary b))
to Arrow Java short (toFloat(short b))</li>
+ * <li>Expose NAN and POSITIVE_INFINITY variables</li>
+ * </ul>
+ *
+ *
+ * The class is a utility class to manipulate half-precision 16-bit
+ * <a
href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE
754</a>
+ * floating point data types (also called fp16 or binary16). A half-precision
float can be
+ * created from or converted to single-precision floats, and is stored in a
short data type.
+ * The IEEE 754 standard specifies an float16 as having the following format:
+ * <ul>
+ * <li>Sign bit: 1 bit</li>
+ * <li>Exponent width: 5 bits</li>
+ * <li>Significand: 10 bits</li>
+ * </ul>
+ *
+ * <p>The format is laid out as follows:</p>
+ * <pre>
+ * 1 11111 1111111111
+ * ^ --^-- -----^----
+ * sign | |_______ significand
+ * |
+ * -- exponent
+ * </pre>
+ * Half-precision floating points can be useful to save memory and/or
+ * bandwidth at the expense of range and precision when compared to
single-precision
+ * floating points (float32).
+ * Ref:
https://android.googlesource.com/platform/libcore/+/master/luni/src/main/java/libcore/util/FP16.java
+ */
+public class Float16 {
+ // Positive infinity of type half-precision float.
+ public static final short POSITIVE_INFINITY = (short) 0x7c00;
+ // A Not-a-Number representation of a half-precision float.
+ public static final short NaN = (short) 0x7e00;
+ // The bitmask to and a number with to obtain the sign bit.
+ private static final int SIGN_MASK = 0x8000;
+ // The offset to shift by to obtain the exponent bits.
+ private static final int EXPONENT_SHIFT = 10;
+ // The bitmask to and a number shifted by EXPONENT_SHIFT right, to obtain
exponent bits.
+ private static final int SHIFTED_EXPONENT_MASK = 0x1f;
+ // The bitmask to and a number with to obtain significand bits.
+ private static final int SIGNIFICAND_MASK = 0x3ff;
+ // The offset of the exponent from the actual value.
+ private static final int EXPONENT_BIAS = 15;
+ // The offset to shift by to obtain the sign bit.
+ private static final int SIGN_SHIFT = 15;
+ // The bitmask to AND with to obtain exponent and significand bits.
+ private static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff;
+
+ private static final int FP32_SIGN_SHIFT = 31;
+ private static final int FP32_EXPONENT_SHIFT = 23;
+ private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff;
+ private static final int FP32_SIGNIFICAND_MASK = 0x7fffff;
+ private static final int FP32_EXPONENT_BIAS = 127;
+ private static final int FP32_QNAN_MASK = 0x400000;
+ private static final int FP32_DENORMAL_MAGIC = 126 << 23;
+ private static final float FP32_DENORMAL_FLOAT =
Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
+
+ /**
+ * Returns true if the specified half-precision float value represents
+ * a Not-a-Number, false otherwise.
+ *
+ * @param h A half-precision float value
+ * @return True if the value is a NaN, false otherwise
+ *
+ */
+ @VisibleForTesting
+ public static boolean isNaN(short h) {
+ return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY;
+ }
+
+ /**
+ * <p>Compares the two specified half-precision float values. The following
+ * conditions apply during the comparison:</p>
+ *
+ * <ul>
+ * <li>NaN is considered by this method to be equal to itself and greater
+ * than all other half-precision float values (including {@code
#POSITIVE_INFINITY})</li>
+ * <li>POSITIVE_ZERO is considered by this method to be greater than
NEGATIVE_ZERO.</li>
+ * </ul>
+ *
+ * @param x The first half-precision float value to compare.
+ * @param y The second half-precision float value to compare
+ *
+ * @return The value {@code 0} if {@code x} is numerically equal to {@code
y}, a
+ * value less than {@code 0} if {@code x} is numerically less than
{@code y},
+ * and a value greater than {@code 0} if {@code x} is numerically
greater
+ * than {@code y}
+ *
+ */
+ @VisibleForTesting
+ public static int compare(short x, short y) {
+ boolean xIsNaN = isNaN(x);
+ boolean yIsNaN = isNaN(y);
+
+ if (!xIsNaN && !yIsNaN) {
+ int first = ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff);
+ int second = ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
+ // Returns true if the first half-precision float value is less
+ // (smaller toward negative infinity) than the second half-precision
float value.
+ if (first < second) {
+ return -1;
+ }
+
+ // Returns true if the first half-precision float value is greater
+ // (larger toward positive infinity) than the second half-precision
float value.
+ if (first > second) {
+ return 1;
+ }
+ }
+
+ // Collapse NaNs, akin to halfToIntBits(), but we want to keep
+ // (signed) short value types to preserve the ordering of -0.0
+ // and +0.0
+ short xBits = xIsNaN ? NaN : x;
+ short yBits = yIsNaN ? NaN : y;
+ return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
+ }
+
+ /**
+ * Converts the specified half-precision float value into a
+ * single-precision float value. The following special cases are handled:
+ * If the input is NaN, the returned value is Float NaN.
+ * If the input is POSITIVE_INFINITY or NEGATIVE_INFINITY, the returned
value is respectively
+ * Float POSITIVE_INFINITY or Float NEGATIVE_INFINITY.
+ * If the input is 0 (positive or negative), the returned value is +/-0.0f.
+ * Otherwise, the returned value is a normalized single-precision float
value.
+ *
+ * @param b The half-precision float value to convert to single-precision
+ * @return A normalized single-precision float value
+ */
+ @VisibleForTesting
+ public static float toFloat(short b) {
+ int bits = b & 0xffff;
+ int s = bits & SIGN_MASK;
+ int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK;
+ int m = (bits) & SIGNIFICAND_MASK;
+ int outE = 0;
+ int outM = 0;
+ if (e == 0) { // Denormal or 0
+ if (m != 0) {
+ // Convert denorm fp16 into normalized fp32
+ float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
+ o -= FP32_DENORMAL_FLOAT;
+ return s == 0 ? o : -o;
+ }
+ } else {
+ outM = m << 13;
+ if (e == 0x1f) { // Infinite or NaN
+ outE = 0xff;
+ if (outM != 0) { // SNaNs are quieted
+ outM |= FP32_QNAN_MASK;
+ }
+ } else {
+ outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS;
+ }
+ }
+ int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
+ return Float.intBitsToFloat(out);
+ }
+
+ /**
+ * Converts the specified single-precision float value into a
+ * half-precision float value. The following special cases are handled:
+ *
+ * If the input is NaN, the returned value is NaN.
+ * If the input is Float POSITIVE_INFINITY or Float NEGATIVE_INFINITY,
+ * the returned value is respectively POSITIVE_INFINITY or
NEGATIVE_INFINITY.
+ * If the input is 0 (positive or negative), the returned value is
+ * POSITIVE_ZERO or NEGATIVE_ZERO.
+ * If the input is a less than MIN_VALUE, the returned value
+ * is flushed to POSITIVE_ZERO or NEGATIVE_ZERO.
+ * If the input is a less than MIN_NORMAL, the returned value
+ * is a denorm half-precision float.
+ * Otherwise, the returned value is rounded to the nearest
+ * representable half-precision float value.
+ *
+ * @param f The single-precision float value to convert to half-precision
+ * @return A half-precision float value
+ */
+ public static short toFloat16(float f) {
+ int bits = Float.floatToRawIntBits(f);
+ int s = (bits >>> FP32_SIGN_SHIFT);
+ int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK;
+ int m = (bits) & FP32_SIGNIFICAND_MASK;
+ int outE = 0;
+ int outM = 0;
+ if (e == 0xff) { // Infinite or NaN
+ outE = 0x1f;
+ outM = m != 0 ? 0x200 : 0;
+ } else {
+ e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS;
+ if (e >= 0x1f) { // Overflow
+ outE = 0x1f;
+ } else if (e <= 0) { // Underflow
+ if (e < -10) {
+ // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
+ } else {
+ // The fp32 value is a normalized float less than MIN_NORMAL,
+ // we convert to a denorm fp16
+ m = m | 0x800000;
+ int shift = 14 - e;
+ outM = m >> shift;
+ int lowm = m & ((1 << shift) - 1);
+ int hway = 1 << (shift - 1);
+ // if above halfway or exactly halfway and outM is odd
+ if (lowm + (outM & 1) > hway) {
+ // Round to nearest even
+ // Can overflow into exponent bit, which surprisingly is OK.
+ // This increment relies on the +outM in the return statement below
+ outM++;
+ }
+ }
+ } else {
+ outE = e;
+ outM = m >> 13;
+ // if above halfway or exactly halfway and outM is odd
+ if ((m & 0x1fff) + (outM & 0x1) > 0x1000) {
+ // Round to nearest even
+ // Can overflow into exponent bit, which surprisingly is OK.
+ // This increment relies on the +outM in the return statement below
+ outM++;
+ }
+ }
+ }
+ // The outM is added here as the +1 increments for outM above can
+ // cause an overflow in the exponent bit which is OK.
+ return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM);
+ }
+
+ /**
+ * Returns a string representation of the specified half-precision
+ * float value. Calling this method is equivalent to calling
+ * <code>Float.toString(toFloat(h))</code>. See {@link Float#toString(float)}
+ * for more information on the format of the string representation.
+ *
+ * @param h A half-precision float value in binary little-endian format
+ * @return A string representation of the specified value
+ */
+ @VisibleForTesting
+ public static String toFloatString(short h) {
+ return Float.toString(Float16.toFloat(h));
+ }
+}
diff --git
a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
index 9ba42abc1c..b4385b72a3 100644
---
a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
+++
b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
@@ -29,6 +29,7 @@ import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
+import org.apache.arrow.memory.util.Float16;
import org.junit.Test;
import org.slf4j.LoggerFactory;
@@ -180,4 +181,14 @@ public class TestArrowBuf {
((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(null);
}
}
+
+ @Test
+ public void testArrowBufFloat16() {
+ try (BufferAllocator allocator = new RootAllocator();
+ ArrowBuf buf = allocator.buffer(1024)
+ ) {
+ buf.setShort(0, Float16.toFloat16(+32.875f));
+ assertEquals((short) 0x501c, buf.getShort(0));
+ }
+ }
}
diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
index 2a92180420..6c2a967712 100644
--- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
+++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
@@ -49,6 +49,16 @@
{ class: "SmallInt", valueHolder: "Int2Holder"},
]
},
+ {
+ major: "Fixed",
+ width: 2,
+ javaType: "short",
+ boxedType: "Short",
+ fields: [{name: "value", type: "short"}],
+ minor: [
+ { class: "Float2", valueHolder: "Int2Holder"},
+ ]
+ },
{
major: "Fixed",
width: 4,
diff --git a/java/vector/src/main/codegen/templates/UnionReader.java
b/java/vector/src/main/codegen/templates/UnionReader.java
index 56a6cc90b3..822d482298 100644
--- a/java/vector/src/main/codegen/templates/UnionReader.java
+++ b/java/vector/src/main/codegen/templates/UnionReader.java
@@ -39,7 +39,9 @@ package org.apache.arrow.vector.complex.impl;
@SuppressWarnings("unused")
public class UnionReader extends AbstractFieldReader {
- private BaseReader[] readers = new BaseReader[45];
+ private static final int NUM_SUPPORTED_TYPES = 46;
+
+ private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
public UnionVector data;
public UnionReader(UnionVector data) {
@@ -50,7 +52,7 @@ public class UnionReader extends AbstractFieldReader {
return TYPES[data.getTypeValue(idx())];
}
- private static MinorType[] TYPES = new MinorType[45];
+ private static MinorType[] TYPES = new MinorType[NUM_SUPPORTED_TYPES];
static {
for (MinorType minorType : MinorType.values()) {
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
new file mode 100644
index 0000000000..9d3f25769a
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
@@ -0,0 +1,434 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.Float16;
+import org.apache.arrow.vector.complex.impl.Float2ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Float2Holder;
+import org.apache.arrow.vector.holders.NullableFloat2Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Float2Vector implements a fixed width (2 bytes) vector of
+ * short values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Float2Vector extends BaseFixedWidthVector implements
FloatingPointVector {
+ public static final byte TYPE_WIDTH = 2;
+
+ /**
+ * Instantiate a Float2Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public Float2Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.FLOAT2.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a Float2Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float2Vector(String name, FieldType fieldType, BufferAllocator
allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a Float2Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float2Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ }
+
+ @Override
+ protected FieldReader getReaderImpl() {
+ return new Float2ReaderImpl(Float2Vector.this);
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FLOAT2;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public short get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableFloat2Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ @Override
+ public Short getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ static short get(final ArrowBuf buffer, final int index) {
+ return buffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ @Override
+ public double getValueAsDouble(int index) {
+ return getValueAsFloat(index);
+ }
+
+ public float getValueAsFloat(int index) {
+ return Float16.toFloat(this.get(index));
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ private void setValue(int index, short value) {
+ valueBuffer.setShort((long) index * TYPE_WIDTH, value);
+ }
+
+ private void setValue(int index, float value) {
+ valueBuffer.setShort((long) index * TYPE_WIDTH, Float16.toFloat16(value));
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, short value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setWithPossibleTruncate(int index, float value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableFloat2Holder holder) throws
IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, Float2Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, short)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, short value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #setWithPossibleTruncate(int, float)} except that it
handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafeWithPossibleTruncate(int index, float value) {
+ handleSafe(index);
+ setWithPossibleTruncate(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableFloat2Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableFloat2Holder holder) throws
IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, Float2Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, Float2Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet
indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, short value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet
indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setWithPossibleTruncate(int index, int isSet, float value) {
+ if (isSet > 0) {
+ setWithPossibleTruncate(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, short)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, short value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Same as {@link #set(int, int, short)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafeWithPossibleTruncate(int index, int isSet, float value) {
+ handleSafe(index);
+ setWithPossibleTruncate(index, isSet, value);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, double value) {
+ throw new UnsupportedOperationException("The operation for double data
types is not supported.");
+ }
+
+ @Override
+ public void setSafeWithPossibleTruncate(int index, double value) {
+ throw new UnsupportedOperationException("The operation for double data
types is not supported.");
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair comprising this and a target vector of
+ * the same type.
+ *
+ * @param field Field object used by the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
+ return new TransferImpl(field, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((Float2Vector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ Float2Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new Float2Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(Field field, BufferAllocator allocator) {
+ to = new Float2Vector(field, allocator);
+ }
+
+ public TransferImpl(Float2Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public Float2Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, Float2Vector.this);
+ }
+ }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
index f29157524f..0b0e0d66a9 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -18,6 +18,7 @@
package org.apache.arrow.vector.types;
import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.HALF;
import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
import static org.apache.arrow.vector.types.UnionMode.Dense;
import static org.apache.arrow.vector.types.UnionMode.Sparse;
@@ -33,6 +34,7 @@ import org.apache.arrow.vector.DurationVector;
import org.apache.arrow.vector.ExtensionTypeVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
@@ -79,6 +81,7 @@ import org.apache.arrow.vector.complex.impl.DecimalWriterImpl;
import org.apache.arrow.vector.complex.impl.DenseUnionWriter;
import org.apache.arrow.vector.complex.impl.DurationWriterImpl;
import org.apache.arrow.vector.complex.impl.FixedSizeBinaryWriterImpl;
+import org.apache.arrow.vector.complex.impl.Float2WriterImpl;
import org.apache.arrow.vector.complex.impl.Float4WriterImpl;
import org.apache.arrow.vector.complex.impl.Float8WriterImpl;
import org.apache.arrow.vector.complex.impl.IntWriterImpl;
@@ -432,6 +435,17 @@ public class Types {
return new IntervalYearWriterImpl((IntervalYearVector) vector);
}
},
+ FLOAT2(new FloatingPoint(HALF)) {
+ @Override
+ public FieldVector getNewVector(Field field, BufferAllocator allocator,
CallBack schemaChangeCallback) {
+ return new Float2Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new Float2WriterImpl((Float2Vector) vector);
+ }
+ },
// 4 byte ieee 754
FLOAT4(new FloatingPoint(SINGLE)) {
@Override
@@ -894,7 +908,7 @@ public class Types {
public MinorType visit(FloatingPoint type) {
switch (type.getPrecision()) {
case HALF:
- throw new UnsupportedOperationException("NYI: " + type);
+ return MinorType.FLOAT2;
case SINGLE:
return MinorType.FLOAT4;
case DOUBLE:
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 614aff18d4..10091aebdd 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -332,6 +332,204 @@ public class TestValueVector {
}
}
+ @Test
+ public void testFixedFloat2() {
+ try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH,
allocator)) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ /* we should not throw exception for these values of capacity */
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+ try {
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4);
+ } catch (OversizedAllocationException oe) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ floatVector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet so capacity of underlying
buffer should be 0 */
+ assertEquals(0, floatVector.getValueCapacity());
+
+ /* allocate 32 bytes (16 * 2) */
+ floatVector.allocateNew();
+ /* underlying buffer should be able to store 16 values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+ initialCapacity = floatVector.getValueCapacity();
+
+ floatVector.zeroVector();
+
+ /* populate the floatVector */
+ floatVector.set(0, (short) 0x101c); //
Float16.toFloat16(+0.00050163269043f)
+ floatVector.set(2, (short) 0x901c); //
Float16.toFloat16(-0.00050163269043f)
+ floatVector.set(4, (short) 0x101d); //
Float16.toFloat16(+0.000502109527588f)
+ floatVector.set(6, (short) 0x901d); //
Float16.toFloat16(-0.000502109527588f)
+ floatVector.set(8, (short) 0x121c); //
Float16.toFloat16(+0.00074577331543f)
+ floatVector.set(10, (short) 0x921c); //
Float16.toFloat16(-0.00074577331543f)
+ floatVector.set(12, (short) 0x501c); // Float16.toFloat16(+32.875f)
+ floatVector.set(14, (short) 0xd01c); // Float16.toFloat16(-32.875f)
+
+ try {
+ floatVector.set(initialCapacity, (short) 0x141c);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check vector contents */
+ assertEquals((short) 0x101c, floatVector.get(0));
+ assertEquals((short) 0x901c, floatVector.get(2));
+ assertEquals((short) 0x101d, floatVector.get(4));
+ assertEquals((short) 0x901d, floatVector.get(6));
+ assertEquals((short) 0x121c, floatVector.get(8));
+ assertEquals((short) 0x921c, floatVector.get(10));
+ assertEquals((short) 0x501c, floatVector.get(12));
+ assertEquals((short) 0xd01c, floatVector.get(14));
+
+ try {
+ floatVector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ /* this should trigger a realloc() */
+ floatVector.setSafe(initialCapacity, (short) 0x141c); //
Float16.toFloat16(+0.00100326538086f)
+
+ /* underlying buffer should now be able to store double the number of
values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector data should still be intact after realloc */
+ assertEquals((short) 0x101c, floatVector.get(0));
+ assertEquals((short) 0x901c, floatVector.get(2));
+ assertEquals((short) 0x101d, floatVector.get(4));
+ assertEquals((short) 0x901d, floatVector.get(6));
+ assertEquals((short) 0x121c, floatVector.get(8));
+ assertEquals((short) 0x921c, floatVector.get(10));
+ assertEquals((short) 0x501c, floatVector.get(12));
+ assertEquals((short) 0xd01c, floatVector.get(14));
+ assertEquals((short) 0x141c, floatVector.get(initialCapacity));
+
+ /* reset the vector */
+ int capacityBeforeReset = floatVector.getValueCapacity();
+ floatVector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertTrue("non-zero data not expected at index: " + i,
floatVector.isNull(i));
+ }
+ }
+ }
+
+ @Test
+ public void testFixedFloat2WithPossibleTruncate() {
+ try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH,
allocator)) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ /* we should not throw exception for these values of capacity */
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+ try {
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4);
+ } catch (OversizedAllocationException oe) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ floatVector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet so capacity of underlying
buffer should be 0 */
+ assertEquals(0, floatVector.getValueCapacity());
+
+ /* allocate 32 bytes (16 * 2) */
+ floatVector.allocateNew();
+ /* underlying buffer should be able to store 16 values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+ initialCapacity = floatVector.getValueCapacity();
+
+ floatVector.zeroVector();
+
+ /* populate the floatVector */
+ floatVector.set(0, (short) 0x101c); //
Float16.toFloat16(+0.00050163269043f)
+ floatVector.set(2, (short) 0x901c); //
Float16.toFloat16(-0.00050163269043f)
+ floatVector.set(4, (short) 0x101d); //
Float16.toFloat16(+0.000502109527588f)
+ floatVector.setWithPossibleTruncate(6, 2049.0f); // in f32=2049.000000,
out f16=2048
+ floatVector.setWithPossibleTruncate(8, 4098.0f); // in f32=4098.000000,
out f16=4096
+ floatVector.setWithPossibleTruncate(10, 8196.0f); // in f32=8196.000000,
out f16=8192
+ floatVector.setWithPossibleTruncate(12, 16392.0f); // in
f32=16392.000000, out f16=16384
+ floatVector.setWithPossibleTruncate(14, 32784.0f); // in
f32=32784.000000, out f16=32768
+
+ try {
+ floatVector.setWithPossibleTruncate(initialCapacity, 1.618034f); // in
f32=1.618034, out f16=1.6181641
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check vector contents */
+ assertEquals((short) 0x101c, floatVector.get(0));
+ assertEquals((short) 0x901c, floatVector.get(2));
+ assertEquals((short) 0x101d, floatVector.get(4));
+ assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0);
+ assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0);
+ assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0);
+ assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0);
+ assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0);
+
+ try {
+ floatVector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ /* this should trigger a realloc() */
+ floatVector.setSafeWithPossibleTruncate(initialCapacity, 1.618034f); //
in f32=1.618034, out f16=1.6181641
+
+ /* underlying buffer should now be able to store double the number of
values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector data should still be intact after realloc */
+ assertEquals((short) 0x101c, floatVector.get(0));
+ assertEquals((short) 0x901c, floatVector.get(2));
+ assertEquals((short) 0x101d, floatVector.get(4));
+ assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0);
+ assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0);
+ assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0);
+ assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0);
+ assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0);
+ assertEquals(1.6181641f, floatVector.getValueAsDouble(initialCapacity),
0);
+
+ /* reset the vector */
+ int capacityBeforeReset = floatVector.getValueCapacity();
+ floatVector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertTrue("non-zero data not expected at index: " + i,
floatVector.isNull(i));
+ }
+ }
+ }
+
@Test /* Float4Vector */
public void testFixedType3() {
try (final Float4Vector floatVector = new Float4Vector(EMPTY_SCHEMA_PATH,
allocator)) {