[ 
https://issues.apache.org/jira/browse/PARQUET-1647?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17778553#comment-17778553
 ] 

ASF GitHub Bot commented on PARQUET-1647:
-----------------------------------------

gszadovszky commented on code in PR #1142:
URL: https://github.com/apache/parquet-mr/pull/1142#discussion_r1368247647


##########
parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java:
##########
@@ -150,26 +150,26 @@ public Float16Builder(PrimitiveType type) {
 
     @Override
     public Statistics<?> build() {
-      Float16Statistics stats = (Float16Statistics) super.build();
+      BinaryStatistics stats = (BinaryStatistics) super.build();
       if (stats.hasNonNullValue()) {
         Binary bMin = stats.genericGetMin();
         Binary bMax = stats.genericGetMax();
         short min = bMin.get2BytesLittleEndian();
         short max = bMax.get2BytesLittleEndian();
         // Drop min/max values in case of NaN as the sorting order of values 
is undefined for this case
         if (Float16.isNaN(min) || Float16.isNaN(max)) {
-          bMin = 
Binary.fromConstantByteArray(Float16.POSITIVE_ZERO_BYTES_LITTLE_ENDIAN);
-          bMax = 
Binary.fromConstantByteArray(Float16.POSITIVE_ZERO_BYTES_LITTLE_ENDIAN);
+          bMin = Binary.fromConstantByteArray(new byte[] {0x00, 0x00});
+          bMax = Binary.fromConstantByteArray(new byte[] {0x00, (byte) 0x80});
           stats.setMinMax(bMin, bMax);
           ((Statistics<?>) stats).hasNonNullValue = false;
         } else {
           // Updating min to -0.0 and max to +0.0 to ensure that no 0.0 values 
would be skipped
-          if (min == Float16.POSITIVE_ZERO) {
-            bMin = 
Binary.fromConstantByteArray(Float16.NEGATIVE_ZERO_BYTES_LITTLE_ENDIAN);
+          if (min == (short) 0x0000) {
+            bMin = Binary.fromConstantByteArray(new byte[] {0x00, (byte) 
0x80});

Review Comment:
   See above



##########
parquet-column/src/main/java/org/apache/parquet/schema/Float16.java:
##########
@@ -46,29 +46,10 @@
  * Ref: 
https://android.googlesource.com/platform/libcore/+/master/luni/src/main/java/libcore/util/FP16.java
  */
 public class Float16 {

Review Comment:
   Please make any methods that are used only from the same package 
`package-private`. 



##########
parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java:
##########
@@ -150,26 +150,26 @@ public Float16Builder(PrimitiveType type) {
 
     @Override
     public Statistics<?> build() {
-      Float16Statistics stats = (Float16Statistics) super.build();
+      BinaryStatistics stats = (BinaryStatistics) super.build();
       if (stats.hasNonNullValue()) {
         Binary bMin = stats.genericGetMin();
         Binary bMax = stats.genericGetMax();
         short min = bMin.get2BytesLittleEndian();
         short max = bMax.get2BytesLittleEndian();
         // Drop min/max values in case of NaN as the sorting order of values 
is undefined for this case
         if (Float16.isNaN(min) || Float16.isNaN(max)) {
-          bMin = 
Binary.fromConstantByteArray(Float16.POSITIVE_ZERO_BYTES_LITTLE_ENDIAN);
-          bMax = 
Binary.fromConstantByteArray(Float16.POSITIVE_ZERO_BYTES_LITTLE_ENDIAN);
+          bMin = Binary.fromConstantByteArray(new byte[] {0x00, 0x00});
+          bMax = Binary.fromConstantByteArray(new byte[] {0x00, (byte) 0x80});
           stats.setMinMax(bMin, bMax);
           ((Statistics<?>) stats).hasNonNullValue = false;
         } else {
           // Updating min to -0.0 and max to +0.0 to ensure that no 0.0 values 
would be skipped
-          if (min == Float16.POSITIVE_ZERO) {
-            bMin = 
Binary.fromConstantByteArray(Float16.NEGATIVE_ZERO_BYTES_LITTLE_ENDIAN);
+          if (min == (short) 0x0000) {
+            bMin = Binary.fromConstantByteArray(new byte[] {0x00, (byte) 
0x80});
             stats.setMinMax(bMin, bMax);
           }
-          if (max == Float16.NEGATIVE_ZERO) {
-            bMax = 
Binary.fromConstantByteArray(Float16.POSITIVE_ZERO_BYTES_LITTLE_ENDIAN);
+          if (max == (short) 0x8000) {
+            bMax = Binary.fromConstantByteArray(new byte[] {0x00, 0x00});

Review Comment:
   See above



##########
parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java:
##########
@@ -150,26 +150,26 @@ public Float16Builder(PrimitiveType type) {
 
     @Override
     public Statistics<?> build() {
-      Float16Statistics stats = (Float16Statistics) super.build();
+      BinaryStatistics stats = (BinaryStatistics) super.build();
       if (stats.hasNonNullValue()) {
         Binary bMin = stats.genericGetMin();
         Binary bMax = stats.genericGetMax();
         short min = bMin.get2BytesLittleEndian();
         short max = bMax.get2BytesLittleEndian();
         // Drop min/max values in case of NaN as the sorting order of values 
is undefined for this case
         if (Float16.isNaN(min) || Float16.isNaN(max)) {
-          bMin = 
Binary.fromConstantByteArray(Float16.POSITIVE_ZERO_BYTES_LITTLE_ENDIAN);
-          bMax = 
Binary.fromConstantByteArray(Float16.POSITIVE_ZERO_BYTES_LITTLE_ENDIAN);
+          bMin = Binary.fromConstantByteArray(new byte[] {0x00, 0x00});
+          bMax = Binary.fromConstantByteArray(new byte[] {0x00, (byte) 0x80});

Review Comment:
   I think, it would be better to use constants with the proper naming. You 
might even use constants of `Binary` directly.





> [Java] support for Arrow's float16
> ----------------------------------
>
>                 Key: PARQUET-1647
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1647
>             Project: Parquet
>          Issue Type: Improvement
>          Components: parquet-format, parquet-thrift
>            Reporter: The Alchemist
>            Priority: Minor
>
> h2. DESCRIPTION
>  
> I'm wondering if there's any interest in supporting Arrow's {{float16}} type 
> in Parquet.
> There seem to be one or two {{float16}} / {{halffloat}} tickets here (e.g., 
> PARQUET-1403) but nothing that speaks to adding half-float support to Parquet 
> in-general.
>  
> h2. PLANS
> I'm able to spend some time on this, if someone points me  in the right 
> direction.
>  
>  # Add the {{HALFFLOAT}} or {{FLOAT16}} enum (any preferred naming 
> convention?) to 
> [https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift#L32]
>  # Add {{HALFFLOAT}} to {{org.apache.parquet.schema.PrimitiveType}}
>  # Add {{HALFFLOAT}} support to 
> {{org.apache.parquet.arrow.schema.SchemaConverter}}
>  # Add encoding for new type at {{org.apache.parquet.column.Encoding}}
>  # ??
> If anyone has any interest in this, pointers, or comments, they would be 
> greatly appreciated!



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to