(tsfile) branch develop updated: Fix float encoder overflow when float value itself over int range (#412)

jiangtian Tue, 18 Feb 2025 03:06:30 -0800

This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git



The following commit(s) were added to refs/heads/develop by this push:
     new abfbfb8c Fix float encoder overflow when float value itself over int 
range (#412)
abfbfb8c is described below

commit abfbfb8c77d75108ac6547c45ab1d89d23c3eeec
Author: Haonan <[email protected]>
AuthorDate: Tue Feb 18 19:05:56 2025 +0800

    Fix float encoder overflow when float value itself over int range (#412)
    
    * Fix float encoder overflow when float value itself over int range
    
    * fix double
    
    * Keep NaN
---
 .github/workflows/code-coverage.yml                |  2 +-
 .../tsfile/encoding/decoder/FloatDecoder.java      | 29 ++++++--
 .../tsfile/encoding/encoder/FloatEncoder.java      | 83 ++++++++++++++++------
 .../tsfile/encoding/decoder/FloatDecoderTest.java  | 21 ++++--
 .../org/apache/tsfile/utils/EncodingUtils.java     | 41 -----------
 5 files changed, 101 insertions(+), 75 deletions(-)

diff --git a/.github/workflows/code-coverage.yml 
b/.github/workflows/code-coverage.yml
index ef9524e1..93e24da7 100644
--- a/.github/workflows/code-coverage.yml
+++ b/.github/workflows/code-coverage.yml
@@ -29,7 +29,7 @@ jobs:
       - name: Generate code coverage reports
         run: |
           sudo apt-get install lcov
-          ./mvnw -B -P with-java,with-cpp,with-python,with-code-coverage clean 
verify
+          ./mvnw -B -P with-java,with-cpp,with-code-coverage clean verify
           lcov --capture --directory cpp/target/build/test --output-file 
cpp/target/build/test/coverage.info
           lcov --remove cpp/target/build/test/coverage.info 
'*/tsfile/cpp/test/*' --output-file cpp/target/build/test/coverage_filtered.info
           genhtml cpp/target/build/test/coverage_filtered.info 
--output-directory cpp/target/build/test/coverage_report
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
index 8f2510a2..56e44d3a 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
@@ -40,7 +40,7 @@ import java.nio.ByteBuffer;
 public class FloatDecoder extends Decoder {
 
   private static final Logger logger = 
LoggerFactory.getLogger(FloatDecoder.class);
-  private Decoder decoder;
+  private final Decoder decoder;
 
   /** maxPointValue = 10^(maxPointNumber). maxPointNumber can be read from the 
stream. */
   private double maxPointValue;
@@ -48,7 +48,8 @@ public class FloatDecoder extends Decoder {
   /** flag that indicates whether we have read maxPointNumber and calculated 
maxPointValue. */
   private boolean isMaxPointNumberRead;
 
-  private BitMap useMaxPointNumber;
+  private BitMap isUnderflowInfo;
+  private BitMap valueItselfOverflowInfo;
   private int position = 0;
 
   public FloatDecoder(TSEncoding encodingType, TSDataType dataType) {
@@ -97,6 +98,10 @@ public class FloatDecoder extends Decoder {
   public float readFloat(ByteBuffer buffer) {
     readMaxPointValue(buffer);
     int value = decoder.readInt(buffer);
+    if (valueItselfOverflowInfo != null && 
valueItselfOverflowInfo.isMarked(position)) {
+      position++;
+      return Float.intBitsToFloat(value);
+    }
     double result = value / getMaxPointValue();
     position++;
     return (float) result;
@@ -106,16 +111,20 @@ public class FloatDecoder extends Decoder {
   public double readDouble(ByteBuffer buffer) {
     readMaxPointValue(buffer);
     long value = decoder.readLong(buffer);
+    if (valueItselfOverflowInfo != null && 
valueItselfOverflowInfo.isMarked(position)) {
+      position++;
+      return Double.longBitsToDouble(value);
+    }
     double result = value / getMaxPointValue();
     position++;
     return result;
   }
 
   private double getMaxPointValue() {
-    if (useMaxPointNumber == null) {
+    if (isUnderflowInfo == null) {
       return maxPointValue;
     } else {
-      return useMaxPointNumber.isMarked(position) ? maxPointValue : 1;
+      return isUnderflowInfo.isMarked(position) ? maxPointValue : 1;
     }
   }
 
@@ -126,7 +135,17 @@ public class FloatDecoder extends Decoder {
         int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
         byte[] tmp = new byte[size / 8 + 1];
         buffer.get(tmp, 0, size / 8 + 1);
-        useMaxPointNumber = new BitMap(size, tmp);
+        isUnderflowInfo = new BitMap(size, tmp);
+        maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        maxPointValue = Math.pow(10, maxPointNumber);
+      } else if (maxPointNumber == Integer.MAX_VALUE - 1) {
+        int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        byte[] tmp = new byte[size / 8 + 1];
+        buffer.get(tmp, 0, size / 8 + 1);
+        isUnderflowInfo = new BitMap(size, tmp);
+        tmp = new byte[size / 8 + 1];
+        buffer.get(tmp, 0, size / 8 + 1);
+        valueItselfOverflowInfo = new BitMap(size, tmp);
         maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
         maxPointValue = Math.pow(10, maxPointNumber);
       } else if (maxPointNumber <= 0) {
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
index dccb2a93..75d7b674 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
@@ -41,7 +41,7 @@ import java.util.List;
  */
 public class FloatEncoder extends Encoder {
 
-  private Encoder encoder;
+  private final Encoder encoder;
 
   /** number for accuracy of decimal places. */
   private int maxPointNumber;
@@ -52,14 +52,17 @@ public class FloatEncoder extends Encoder {
   /** flag to check whether maxPointNumber is saved in the stream. */
   private boolean isMaxPointNumberSaved;
 
-  private final List<Boolean> useMaxPointNumber;
+  // value * maxPointValue not overflow -> True
+  // value * maxPointValue overflow -> False
+  // value itself overflow -> null
+  private final List<Boolean> underflowFlags;
 
   public FloatEncoder(TSEncoding encodingType, TSDataType dataType, int 
maxPointNumber) {
     super(encodingType);
     this.maxPointNumber = maxPointNumber;
     calculateMaxPointNum();
     isMaxPointNumberSaved = false;
-    useMaxPointNumber = new ArrayList<>();
+    underflowFlags = new ArrayList<>();
     if (encodingType == TSEncoding.RLE) {
       if (dataType == TSDataType.FLOAT) {
         encoder = new IntRleEncoder();
@@ -118,39 +121,73 @@ public class FloatEncoder extends Encoder {
 
   private int convertFloatToInt(float value) {
     if (value * maxPointValue > Integer.MAX_VALUE || value * maxPointValue < 
Integer.MIN_VALUE) {
-      useMaxPointNumber.add(false);
-      return Math.round(value);
+      if (value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) {
+        underflowFlags.add(null);
+        return Float.floatToIntBits(value);
+      } else {
+        underflowFlags.add(false);
+        return Math.round(value);
+      }
     } else {
-      useMaxPointNumber.add(true);
-      return (int) Math.round(value * maxPointValue);
+      if (Float.isNaN(value)) {
+        underflowFlags.add(null);
+        return Float.floatToIntBits(value);
+      } else {
+        underflowFlags.add(true);
+        return (int) Math.round(value * maxPointValue);
+      }
     }
   }
 
   private long convertDoubleToLong(double value) {
     if (value * maxPointValue > Long.MAX_VALUE || value * maxPointValue < 
Long.MIN_VALUE) {
-      useMaxPointNumber.add(false);
-      return Math.round(value);
+      if (value > Long.MAX_VALUE || value < Long.MIN_VALUE) {
+        underflowFlags.add(null);
+        return Double.doubleToLongBits(value);
+      } else {
+        underflowFlags.add(false);
+        return Math.round(value);
+      }
     } else {
-      useMaxPointNumber.add(true);
-      return Math.round(value * maxPointValue);
+      if (Double.isNaN(value)) {
+        underflowFlags.add(null);
+        return Double.doubleToLongBits(value);
+      } else {
+        underflowFlags.add(true);
+        return Math.round(value * maxPointValue);
+      }
     }
   }
 
   @Override
   public void flush(ByteArrayOutputStream out) throws IOException {
     encoder.flush(out);
-    if (pointsNotUseMaxPointNumber()) {
+    if (hasOverflow()) {
       byte[] ba = out.toByteArray();
       out.reset();
-      ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out);
-      BitMap bitMap = new BitMap(useMaxPointNumber.size());
-      for (int i = 0; i < useMaxPointNumber.size(); i++) {
-        if (useMaxPointNumber.get(i)) {
-          bitMap.mark(i);
+      BitMap bitMapOfValueItselfOverflowInfo = null;
+      BitMap bitMapOfUnderflowInfo = new BitMap(underflowFlags.size());
+      for (int i = 0; i < underflowFlags.size(); i++) {
+        if (underflowFlags.get(i) == null) {
+          if (bitMapOfValueItselfOverflowInfo == null) {
+            bitMapOfValueItselfOverflowInfo = new 
BitMap(underflowFlags.size());
+          }
+          bitMapOfValueItselfOverflowInfo.mark(i);
+        } else if (underflowFlags.get(i)) {
+          bitMapOfUnderflowInfo.mark(i);
         }
       }
-      ReadWriteForEncodingUtils.writeUnsignedVarInt(useMaxPointNumber.size(), 
out);
-      out.write(bitMap.getByteArray());
+      if (bitMapOfValueItselfOverflowInfo != null) {
+        // flag of value itself contains
+        ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE - 1, 
out);
+      } else {
+        ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out);
+      }
+      ReadWriteForEncodingUtils.writeUnsignedVarInt(underflowFlags.size(), 
out);
+      out.write(bitMapOfUnderflowInfo.getByteArray());
+      if (bitMapOfValueItselfOverflowInfo != null) {
+        out.write(bitMapOfValueItselfOverflowInfo.getByteArray());
+      }
       out.write(ba);
     }
     reset();
@@ -158,12 +195,12 @@ public class FloatEncoder extends Encoder {
 
   private void reset() {
     isMaxPointNumberSaved = false;
-    useMaxPointNumber.clear();
+    underflowFlags.clear();
   }
 
-  private boolean pointsNotUseMaxPointNumber() {
-    for (boolean info : useMaxPointNumber) {
-      if (!info) {
+  private Boolean hasOverflow() {
+    for (Boolean flag : underflowFlags) {
+      if (flag == null || !flag) {
         return true;
       }
     }
diff --git 
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
 
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
index 417a4e0b..6f56b584 100644
--- 
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
+++ 
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
@@ -34,7 +34,6 @@ import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.apache.tsfile.utils.EncodingUtils.roundWithGivenPrecision;
 import static org.junit.Assert.assertEquals;
 
 public class FloatDecoderTest {
@@ -208,32 +207,44 @@ public class FloatDecoderTest {
   public void testBigFloat() throws Exception {
     float a = 0.333F;
     float b = 6.5536403E8F;
+    float c = 3.123456768E20F;
+    float d = Float.NaN;
     Encoder encoder = new FloatEncoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT, 
2);
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     encoder.encode(a, baos);
     encoder.encode(b, baos);
+    encoder.encode(c, baos);
+    encoder.encode(d, baos);
     encoder.flush(baos);
 
     ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
     Decoder decoder = new FloatDecoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT);
-    assertEquals(roundWithGivenPrecision(a, 2), decoder.readFloat(buffer), 
delta);
-    assertEquals(roundWithGivenPrecision(b, 2), decoder.readFloat(buffer), 
delta);
+    assertEquals(a, decoder.readFloat(buffer), 0.01);
+    assertEquals(b, decoder.readFloat(buffer), 0.01);
+    assertEquals(c, decoder.readFloat(buffer), 0.01);
+    assertEquals(d, decoder.readFloat(buffer), 0.01);
   }
 
   @Test
   public void testBigDouble() throws Exception {
     double a = 0.333;
     double b = 9.223372036854E18;
+    double c = 9.223372036854E100;
+    double d = Double.NaN;
     Encoder encoder = new FloatEncoder(TSEncoding.RLE, TSDataType.DOUBLE, 2);
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     encoder.encode(a, baos);
     encoder.encode(b, baos);
+    encoder.encode(c, baos);
+    encoder.encode(d, baos);
     encoder.flush(baos);
 
     ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
     Decoder decoder = new FloatDecoder(TSEncoding.RLE, TSDataType.DOUBLE);
-    assertEquals(roundWithGivenPrecision(a, 2), decoder.readDouble(buffer), 
delta);
-    assertEquals(roundWithGivenPrecision(b, 2), decoder.readDouble(buffer), 
delta);
+    assertEquals(a, decoder.readDouble(buffer), 0.01);
+    assertEquals(b, decoder.readDouble(buffer), 0.01);
+    assertEquals(c, decoder.readDouble(buffer), 0.01);
+    assertEquals(d, decoder.readDouble(buffer), 0.01);
   }
 
   // private void testDecimalLenght(TSEncoding encoding, List<Double> 
valueList,
diff --git 
a/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java 
b/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
deleted file mode 100644
index 32dd29e4..00000000
--- a/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.tsfile.utils;
-
-public class EncodingUtils {
-
-  // Copied from org.apache.iotdb.db.utils.MathUtils
-  public static float roundWithGivenPrecision(float data, int size) {
-    if (size == 0) {
-      return Math.round(data);
-    }
-    return Math.round(data)
-        + Math.round(((data - Math.round(data)) * Math.pow(10, size))) / 
(float) Math.pow(10, size);
-  }
-
-  // Copied from org.apache.iotdb.db.utils.MathUtils
-  public static double roundWithGivenPrecision(double data, int size) {
-    if (size == 0) {
-      return Math.round(data);
-    }
-    return Math.round(data)
-        + Math.round(((data - Math.round(data)) * Math.pow(10, size))) / 
Math.pow(10, size);
-  }
-}

(tsfile) branch develop updated: Fix float encoder overflow when float value itself over int range (#412)

Reply via email to