[ 
https://issues.apache.org/jira/browse/PARQUET-1647?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17776254#comment-17776254
 ] 

ASF GitHub Bot commented on PARQUET-1647:
-----------------------------------------

wgtmac commented on code in PR #1142:
URL: https://github.com/apache/parquet-mr/pull/1142#discussion_r1362335145


##########
parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestFloat16Statistics.java:
##########
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.statistics;
+
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.example.data.GroupFactory;
+import org.apache.parquet.example.data.simple.SimpleGroupFactory;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.Types;
+import org.apache.parquet.type.Float16;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.hadoop.example.ExampleParquetWriter;
+import org.apache.parquet.hadoop.example.GroupWriteSupport;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+
+import static org.apache.parquet.schema.LogicalTypeAnnotation.float16Type;
+import static 
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
+import static org.junit.Assert.assertEquals;
+
+public class TestFloat16Statistics {
+
+  @Rule
+  public TemporaryFolder temp = new TemporaryFolder();
+
+  private short[] valuesInAscendingOrder = {
+    (short) 0xfc00, // -Infinity
+    (short) 0xc000, // -2.0
+    -Float16.MAX_VALUE, // -6.109476E-5
+    Float16.NEGATIVE_ZERO, // -0
+    Float16.POSITIVE_ZERO, // +0
+    Float16.MIN_VALUE, // 5.9604645E-8
+    Float16.MAX_VALUE, // 65504.0
+    (short) 0x7c00}; // Infinity
+
+  private short[] valuesInAscendingOrderMinMax = {
+    (short) 0xfc00, // -Infinity
+    (short) 0x7c00}; // Infinity
+
+  private short[] valuesInDescendingOrder = {
+    (short) 0x7c00, // Infinity
+    Float16.MAX_VALUE, // 65504.0
+    Float16.MIN_VALUE, // 5.9604645E-8
+    Float16.POSITIVE_ZERO, // +0
+    Float16.NEGATIVE_ZERO, // -0
+    -Float16.MAX_VALUE, // -6.109476E-5
+    (short) 0xc000, // -2.0
+    (short) 0xfc00}; // -Infinity
+
+  private short[] valuesInDescendingOrderMinMax = {
+    (short) 0xfc00, // -Infinity
+    (short) 0x7c00}; // Infinity
+
+  private short[] valuesUndefinedOrder = {
+    Float16.MAX_VALUE, // 65504.0
+    (short) 0x7c00, // Infinity
+    Float16.NEGATIVE_ZERO, // -0
+    Float16.MIN_VALUE, // 5.9604645E-8
+    Float16.POSITIVE_ZERO, // +0
+    (short) 0xc000, // -2.0
+    -Float16.MAX_VALUE, // -6.109476E-5
+    (short) 0xfc00}; // -Infinity
+
+  private short[] valuesUndefinedOrderMinMax = {
+    (short) 0xfc00, // -Infinity
+    (short) 0x7c00}; // Infinity
+
+  private short[] valuesAllPositiveZero = {
+    Float16.POSITIVE_ZERO, // +0
+    Float16.POSITIVE_ZERO, // +0
+    Float16.POSITIVE_ZERO, // +0
+    Float16.POSITIVE_ZERO}; // +0
+
+  private short[] valuesAllPositiveZeroMinMax = {
+    Float16.POSITIVE_ZERO, // +0
+    Float16.POSITIVE_ZERO}; // +0
+
+  // Float16Statistics: Updating min to -0.0 to ensure that no 0.0 values 
would be skipped
+  private short[] valuesAllPositiveStatsZeroMinMax = {
+    Float16.NEGATIVE_ZERO, // -0
+    Float16.POSITIVE_ZERO}; // +0
+
+  private short[] valuesAllNegativeZero = {
+    Float16.NEGATIVE_ZERO, // -0
+    Float16.NEGATIVE_ZERO, // -0
+    Float16.NEGATIVE_ZERO, // -0
+    Float16.NEGATIVE_ZERO}; // -0
+
+  private short[] valuesAllNegativeZeroMinMax = {
+    Float16.NEGATIVE_ZERO, // -0
+    Float16.NEGATIVE_ZERO}; // -0
+
+  // Float16Statistics: Updating max to +0.0 to ensure that no 0.0 values 
would be skipped
+  private short[] valuesAllNegativeStatsZeroMinMax = {
+    Float16.NEGATIVE_ZERO, // -0
+    Float16.POSITIVE_ZERO}; // +0
+
+  private short[] valuesWithNaN = {
+    (short) 0xc000, // -2.0
+    Float16.MAX_VALUE, // 65504.0
+    Float16.MIN_VALUE, // 5.9604645E-8
+    Float16.NaN}; // NaN
+
+  private short[] valuesWithNaNMinMax = {
+    (short) 0xc000, // -2.0
+    Float16.NaN}; // NaN
+
+  // Float16Statistics: Drop min/max values in case of NaN as the sorting 
order of values is undefined
+  private short[] valuesWithNaNStatsMinMax = {
+    Float16.POSITIVE_ZERO, // +0
+    Float16.POSITIVE_ZERO}; // +0
+
+  @Test
+  public void testFloat16ColumnIndex() throws IOException {

Review Comment:
   It would be good to move this test and below to a new file called 
`TestFloat16ReadWriteRoundTrip.java`. So this file can solely test the 
Float16Statistics class.



##########
parquet-common/src/main/java/org/apache/parquet/type/Float16.java:
##########
@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.type;
+
+import java.util.Arrays;
+
+/**
+ * The class is a utility class to manipulate half-precision 16-bit
+ * <a 
href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format";>IEEE 
754</a>
+ * floating point data types (also called fp16 or binary16). A half-precision 
float can be
+ * created from or converted to single-precision floats, and is stored in a 
short data type.
+ * The IEEE 754 standard specifies an float16 as having the following format:
+ * <ul>
+ * <li>Sign bit: 1 bit</li>
+ * <li>Exponent width: 5 bits</li>
+ * <li>Significand: 10 bits</li>
+ * </ul>
+ *
+ * <p>The format is laid out as follows:</p>
+ * <pre>
+ * 1   11111   1111111111
+ * ^   --^

> [Java] support for Arrow's float16
> ----------------------------------
>
>                 Key: PARQUET-1647
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1647
>             Project: Parquet
>          Issue Type: Improvement
>          Components: parquet-format, parquet-thrift
>            Reporter: The Alchemist
>            Priority: Minor
>
> h2. DESCRIPTION
>  
> I'm wondering if there's any interest in supporting Arrow's {{float16}} type 
> in Parquet.
> There seem to be one or two {{float16}} / {{halffloat}} tickets here (e.g., 
> PARQUET-1403) but nothing that speaks to adding half-float support to Parquet 
> in-general.
>  
> h2. PLANS
> I'm able to spend some time on this, if someone points me  in the right 
> direction.
>  
>  # Add the {{HALFFLOAT}} or {{FLOAT16}} enum (any preferred naming 
> convention?) to 
> [https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift#L32]
>  # Add {{HALFFLOAT}} to {{org.apache.parquet.schema.PrimitiveType}}
>  # Add {{HALFFLOAT}} support to 
> {{org.apache.parquet.arrow.schema.SchemaConverter}}
>  # Add encoding for new type at {{org.apache.parquet.column.Encoding}}
>  # ??
> If anyone has any interest in this, pointers, or comments, they would be 
> greatly appreciated!



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to