(tsfile) branch dev/1.1 updated: Fix float encoder overflow when float value itself over int range (#412)

jiangtian Tue, 18 Feb 2025 17:25:38 -0800

This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch dev/1.1
in repository https://gitbox.apache.org/repos/asf/tsfile.git



The following commit(s) were added to refs/heads/dev/1.1 by this push:
     new 99eef393 Fix float encoder overflow when float value itself over int 
range (#412)
99eef393 is described below

commit 99eef39313f6b44d746984513327547f04f5954f
Author: Haonan <[email protected]>
AuthorDate: Tue Feb 18 19:05:56 2025 +0800

    Fix float encoder overflow when float value itself over int range (#412)
---
 .github/workflows/code-coverage.yml                |  2 +-
 .../tsfile/encoding/decoder/FloatDecoder.java      | 50 +++++++++++-
 .../tsfile/encoding/encoder/FloatEncoder.java      | 89 ++++++++++++++++++++--
 .../tsfile/encoding/decoder/FloatDecoderTest.java  | 44 +++++++++++
 4 files changed, 175 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/code-coverage.yml 
b/.github/workflows/code-coverage.yml
index 5933b410..9a660dab 100644
--- a/.github/workflows/code-coverage.yml
+++ b/.github/workflows/code-coverage.yml
@@ -31,7 +31,7 @@ jobs:
       - name: Generate code coverage reports
         run: |
           sudo apt-get install lcov
-          ./mvnw -B -P with-java,with-cpp,with-python,with-code-coverage clean 
verify
+          ./mvnw -B -P with-java,with-cpp,with-code-coverage clean verify
           lcov --capture --directory cpp/target/build/test --output-file 
cpp/target/build/test/coverage.info
           lcov --remove cpp/target/build/test/coverage.info 
'*/tsfile/cpp/test/*' --output-file cpp/target/build/test/coverage_filtered.info
           genhtml cpp/target/build/test/coverage_filtered.info 
--output-directory cpp/target/build/test/coverage_report
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
index 6b019930..56e44d3a 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
@@ -24,6 +24,7 @@ import org.apache.tsfile.enums.TSDataType;
 import org.apache.tsfile.exception.encoding.TsFileDecodingException;
 import org.apache.tsfile.file.metadata.enums.TSEncoding;
 import org.apache.tsfile.utils.Binary;
+import org.apache.tsfile.utils.BitMap;
 import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
 
 import org.slf4j.Logger;
@@ -39,7 +40,7 @@ import java.nio.ByteBuffer;
 public class FloatDecoder extends Decoder {
 
   private static final Logger logger = 
LoggerFactory.getLogger(FloatDecoder.class);
-  private Decoder decoder;
+  private final Decoder decoder;
 
   /** maxPointValue = 10^(maxPointNumber). maxPointNumber can be read from the 
stream. */
   private double maxPointValue;
@@ -47,6 +48,10 @@ public class FloatDecoder extends Decoder {
   /** flag that indicates whether we have read maxPointNumber and calculated 
maxPointValue. */
   private boolean isMaxPointNumberRead;
 
+  private BitMap isUnderflowInfo;
+  private BitMap valueItselfOverflowInfo;
+  private int position = 0;
+
   public FloatDecoder(TSEncoding encodingType, TSDataType dataType) {
     super(encodingType);
     if (encodingType == TSEncoding.RLE) {
@@ -93,7 +98,12 @@ public class FloatDecoder extends Decoder {
   public float readFloat(ByteBuffer buffer) {
     readMaxPointValue(buffer);
     int value = decoder.readInt(buffer);
-    double result = value / maxPointValue;
+    if (valueItselfOverflowInfo != null && 
valueItselfOverflowInfo.isMarked(position)) {
+      position++;
+      return Float.intBitsToFloat(value);
+    }
+    double result = value / getMaxPointValue();
+    position++;
     return (float) result;
   }
 
@@ -101,13 +111,44 @@ public class FloatDecoder extends Decoder {
   public double readDouble(ByteBuffer buffer) {
     readMaxPointValue(buffer);
     long value = decoder.readLong(buffer);
-    return value / maxPointValue;
+    if (valueItselfOverflowInfo != null && 
valueItselfOverflowInfo.isMarked(position)) {
+      position++;
+      return Double.longBitsToDouble(value);
+    }
+    double result = value / getMaxPointValue();
+    position++;
+    return result;
+  }
+
+  private double getMaxPointValue() {
+    if (isUnderflowInfo == null) {
+      return maxPointValue;
+    } else {
+      return isUnderflowInfo.isMarked(position) ? maxPointValue : 1;
+    }
   }
 
   private void readMaxPointValue(ByteBuffer buffer) {
     if (!isMaxPointNumberRead) {
       int maxPointNumber = 
ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
-      if (maxPointNumber <= 0) {
+      if (maxPointNumber == Integer.MAX_VALUE) {
+        int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        byte[] tmp = new byte[size / 8 + 1];
+        buffer.get(tmp, 0, size / 8 + 1);
+        isUnderflowInfo = new BitMap(size, tmp);
+        maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        maxPointValue = Math.pow(10, maxPointNumber);
+      } else if (maxPointNumber == Integer.MAX_VALUE - 1) {
+        int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        byte[] tmp = new byte[size / 8 + 1];
+        buffer.get(tmp, 0, size / 8 + 1);
+        isUnderflowInfo = new BitMap(size, tmp);
+        tmp = new byte[size / 8 + 1];
+        buffer.get(tmp, 0, size / 8 + 1);
+        valueItselfOverflowInfo = new BitMap(size, tmp);
+        maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        maxPointValue = Math.pow(10, maxPointNumber);
+      } else if (maxPointNumber <= 0) {
         maxPointValue = 1;
       } else {
         maxPointValue = Math.pow(10, maxPointNumber);
@@ -153,5 +194,6 @@ public class FloatDecoder extends Decoder {
   public void reset() {
     this.decoder.reset();
     this.isMaxPointNumberRead = false;
+    this.position = 0;
   }
 }
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
index adf328e1..75d7b674 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
@@ -22,10 +22,13 @@ package org.apache.tsfile.encoding.encoder;
 import org.apache.tsfile.enums.TSDataType;
 import org.apache.tsfile.exception.encoding.TsFileEncodingException;
 import org.apache.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.tsfile.utils.BitMap;
 import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 
 /**
  * Encoder for float or double value using rle or two-diff according to 
following grammar.
@@ -38,7 +41,7 @@ import java.io.IOException;
  */
 public class FloatEncoder extends Encoder {
 
-  private Encoder encoder;
+  private final Encoder encoder;
 
   /** number for accuracy of decimal places. */
   private int maxPointNumber;
@@ -49,11 +52,17 @@ public class FloatEncoder extends Encoder {
   /** flag to check whether maxPointNumber is saved in the stream. */
   private boolean isMaxPointNumberSaved;
 
+  // value * maxPointValue not overflow -> True
+  // value * maxPointValue overflow -> False
+  // value itself overflow -> null
+  private final List<Boolean> underflowFlags;
+
   public FloatEncoder(TSEncoding encodingType, TSDataType dataType, int 
maxPointNumber) {
     super(encodingType);
     this.maxPointNumber = maxPointNumber;
-    calculateMaxPonitNum();
+    calculateMaxPointNum();
     isMaxPointNumberSaved = false;
+    underflowFlags = new ArrayList<>();
     if (encodingType == TSEncoding.RLE) {
       if (dataType == TSDataType.FLOAT) {
         encoder = new IntRleEncoder();
@@ -101,7 +110,7 @@ public class FloatEncoder extends Encoder {
     encoder.encode(valueLong, out);
   }
 
-  private void calculateMaxPonitNum() {
+  private void calculateMaxPointNum() {
     if (maxPointNumber <= 0) {
       maxPointNumber = 0;
       maxPointValue = 1;
@@ -111,21 +120,91 @@ public class FloatEncoder extends Encoder {
   }
 
   private int convertFloatToInt(float value) {
-    return (int) Math.round(value * maxPointValue);
+    if (value * maxPointValue > Integer.MAX_VALUE || value * maxPointValue < 
Integer.MIN_VALUE) {
+      if (value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) {
+        underflowFlags.add(null);
+        return Float.floatToIntBits(value);
+      } else {
+        underflowFlags.add(false);
+        return Math.round(value);
+      }
+    } else {
+      if (Float.isNaN(value)) {
+        underflowFlags.add(null);
+        return Float.floatToIntBits(value);
+      } else {
+        underflowFlags.add(true);
+        return (int) Math.round(value * maxPointValue);
+      }
+    }
   }
 
   private long convertDoubleToLong(double value) {
-    return Math.round(value * maxPointValue);
+    if (value * maxPointValue > Long.MAX_VALUE || value * maxPointValue < 
Long.MIN_VALUE) {
+      if (value > Long.MAX_VALUE || value < Long.MIN_VALUE) {
+        underflowFlags.add(null);
+        return Double.doubleToLongBits(value);
+      } else {
+        underflowFlags.add(false);
+        return Math.round(value);
+      }
+    } else {
+      if (Double.isNaN(value)) {
+        underflowFlags.add(null);
+        return Double.doubleToLongBits(value);
+      } else {
+        underflowFlags.add(true);
+        return Math.round(value * maxPointValue);
+      }
+    }
   }
 
   @Override
   public void flush(ByteArrayOutputStream out) throws IOException {
     encoder.flush(out);
+    if (hasOverflow()) {
+      byte[] ba = out.toByteArray();
+      out.reset();
+      BitMap bitMapOfValueItselfOverflowInfo = null;
+      BitMap bitMapOfUnderflowInfo = new BitMap(underflowFlags.size());
+      for (int i = 0; i < underflowFlags.size(); i++) {
+        if (underflowFlags.get(i) == null) {
+          if (bitMapOfValueItselfOverflowInfo == null) {
+            bitMapOfValueItselfOverflowInfo = new 
BitMap(underflowFlags.size());
+          }
+          bitMapOfValueItselfOverflowInfo.mark(i);
+        } else if (underflowFlags.get(i)) {
+          bitMapOfUnderflowInfo.mark(i);
+        }
+      }
+      if (bitMapOfValueItselfOverflowInfo != null) {
+        // flag of value itself contains
+        ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE - 1, 
out);
+      } else {
+        ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out);
+      }
+      ReadWriteForEncodingUtils.writeUnsignedVarInt(underflowFlags.size(), 
out);
+      out.write(bitMapOfUnderflowInfo.getByteArray());
+      if (bitMapOfValueItselfOverflowInfo != null) {
+        out.write(bitMapOfValueItselfOverflowInfo.getByteArray());
+      }
+      out.write(ba);
+    }
     reset();
   }
 
   private void reset() {
     isMaxPointNumberSaved = false;
+    underflowFlags.clear();
+  }
+
+  private Boolean hasOverflow() {
+    for (Boolean flag : underflowFlags) {
+      if (flag == null || !flag) {
+        return true;
+      }
+    }
+    return false;
   }
 
   private void saveMaxPointNumber(ByteArrayOutputStream out) {
diff --git 
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
 
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
index bdc1db9f..6f56b584 100644
--- 
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
+++ 
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
@@ -203,6 +203,50 @@ public class FloatDecoderTest {
     }
   }
 
+  @Test
+  public void testBigFloat() throws Exception {
+    float a = 0.333F;
+    float b = 6.5536403E8F;
+    float c = 3.123456768E20F;
+    float d = Float.NaN;
+    Encoder encoder = new FloatEncoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT, 
2);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    encoder.encode(a, baos);
+    encoder.encode(b, baos);
+    encoder.encode(c, baos);
+    encoder.encode(d, baos);
+    encoder.flush(baos);
+
+    ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+    Decoder decoder = new FloatDecoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT);
+    assertEquals(a, decoder.readFloat(buffer), 0.01);
+    assertEquals(b, decoder.readFloat(buffer), 0.01);
+    assertEquals(c, decoder.readFloat(buffer), 0.01);
+    assertEquals(d, decoder.readFloat(buffer), 0.01);
+  }
+
+  @Test
+  public void testBigDouble() throws Exception {
+    double a = 0.333;
+    double b = 9.223372036854E18;
+    double c = 9.223372036854E100;
+    double d = Double.NaN;
+    Encoder encoder = new FloatEncoder(TSEncoding.RLE, TSDataType.DOUBLE, 2);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    encoder.encode(a, baos);
+    encoder.encode(b, baos);
+    encoder.encode(c, baos);
+    encoder.encode(d, baos);
+    encoder.flush(baos);
+
+    ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+    Decoder decoder = new FloatDecoder(TSEncoding.RLE, TSDataType.DOUBLE);
+    assertEquals(a, decoder.readDouble(buffer), 0.01);
+    assertEquals(b, decoder.readDouble(buffer), 0.01);
+    assertEquals(c, decoder.readDouble(buffer), 0.01);
+    assertEquals(d, decoder.readDouble(buffer), 0.01);
+  }
+
   // private void testDecimalLenght(TSEncoding encoding, List<Double> 
valueList,
   // int maxPointValue,
   // boolean isDebug, int repeatCount) throws Exception {

(tsfile) branch dev/1.1 updated: Fix float encoder overflow when float value itself over int range (#412)

Reply via email to