(tsfile) branch develop updated: Fix float encoder overflow (#342)

jiangtian Tue, 24 Dec 2024 20:11:26 -0800

This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git



The following commit(s) were added to refs/heads/develop by this push:
     new 654e5091 Fix float encoder overflow (#342)
654e5091 is described below

commit 654e50915eb148f57cdaf38aa8de5e22d564db2f
Author: Haonan <[email protected]>
AuthorDate: Wed Dec 25 12:11:05 2024 +0800

    Fix float encoder overflow (#342)
    
    * try one try
    
    * Fix FloatEncoder overflow problem
    
    * add more UT
    
    * add more UT
---
 .../tsfile/encoding/decoder/FloatDecoder.java      | 29 +++++++++++--
 .../tsfile/encoding/encoder/FloatEncoder.java      | 50 ++++++++++++++++++++--
 .../tsfile/encoding/decoder/FloatDecoderTest.java  | 33 ++++++++++++++
 .../org/apache/tsfile/utils/EncodingUtils.java     | 41 ++++++++++++++++++
 .../apache/tsfile/write/TsFileReadWriteTest.java   |  9 +++-
 5 files changed, 153 insertions(+), 9 deletions(-)

diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
index 6b019930..8f2510a2 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
@@ -24,6 +24,7 @@ import org.apache.tsfile.enums.TSDataType;
 import org.apache.tsfile.exception.encoding.TsFileDecodingException;
 import org.apache.tsfile.file.metadata.enums.TSEncoding;
 import org.apache.tsfile.utils.Binary;
+import org.apache.tsfile.utils.BitMap;
 import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
 
 import org.slf4j.Logger;
@@ -47,6 +48,9 @@ public class FloatDecoder extends Decoder {
   /** flag that indicates whether we have read maxPointNumber and calculated 
maxPointValue. */
   private boolean isMaxPointNumberRead;
 
+  private BitMap useMaxPointNumber;
+  private int position = 0;
+
   public FloatDecoder(TSEncoding encodingType, TSDataType dataType) {
     super(encodingType);
     if (encodingType == TSEncoding.RLE) {
@@ -93,7 +97,8 @@ public class FloatDecoder extends Decoder {
   public float readFloat(ByteBuffer buffer) {
     readMaxPointValue(buffer);
     int value = decoder.readInt(buffer);
-    double result = value / maxPointValue;
+    double result = value / getMaxPointValue();
+    position++;
     return (float) result;
   }
 
@@ -101,13 +106,30 @@ public class FloatDecoder extends Decoder {
   public double readDouble(ByteBuffer buffer) {
     readMaxPointValue(buffer);
     long value = decoder.readLong(buffer);
-    return value / maxPointValue;
+    double result = value / getMaxPointValue();
+    position++;
+    return result;
+  }
+
+  private double getMaxPointValue() {
+    if (useMaxPointNumber == null) {
+      return maxPointValue;
+    } else {
+      return useMaxPointNumber.isMarked(position) ? maxPointValue : 1;
+    }
   }
 
   private void readMaxPointValue(ByteBuffer buffer) {
     if (!isMaxPointNumberRead) {
       int maxPointNumber = 
ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
-      if (maxPointNumber <= 0) {
+      if (maxPointNumber == Integer.MAX_VALUE) {
+        int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        byte[] tmp = new byte[size / 8 + 1];
+        buffer.get(tmp, 0, size / 8 + 1);
+        useMaxPointNumber = new BitMap(size, tmp);
+        maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        maxPointValue = Math.pow(10, maxPointNumber);
+      } else if (maxPointNumber <= 0) {
         maxPointValue = 1;
       } else {
         maxPointValue = Math.pow(10, maxPointNumber);
@@ -153,5 +175,6 @@ public class FloatDecoder extends Decoder {
   public void reset() {
     this.decoder.reset();
     this.isMaxPointNumberRead = false;
+    this.position = 0;
   }
 }
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
index adf328e1..dccb2a93 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
@@ -22,10 +22,13 @@ package org.apache.tsfile.encoding.encoder;
 import org.apache.tsfile.enums.TSDataType;
 import org.apache.tsfile.exception.encoding.TsFileEncodingException;
 import org.apache.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.tsfile.utils.BitMap;
 import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 
 /**
  * Encoder for float or double value using rle or two-diff according to 
following grammar.
@@ -49,11 +52,14 @@ public class FloatEncoder extends Encoder {
   /** flag to check whether maxPointNumber is saved in the stream. */
   private boolean isMaxPointNumberSaved;
 
+  private final List<Boolean> useMaxPointNumber;
+
   public FloatEncoder(TSEncoding encodingType, TSDataType dataType, int 
maxPointNumber) {
     super(encodingType);
     this.maxPointNumber = maxPointNumber;
-    calculateMaxPonitNum();
+    calculateMaxPointNum();
     isMaxPointNumberSaved = false;
+    useMaxPointNumber = new ArrayList<>();
     if (encodingType == TSEncoding.RLE) {
       if (dataType == TSDataType.FLOAT) {
         encoder = new IntRleEncoder();
@@ -101,7 +107,7 @@ public class FloatEncoder extends Encoder {
     encoder.encode(valueLong, out);
   }
 
-  private void calculateMaxPonitNum() {
+  private void calculateMaxPointNum() {
     if (maxPointNumber <= 0) {
       maxPointNumber = 0;
       maxPointValue = 1;
@@ -111,21 +117,57 @@ public class FloatEncoder extends Encoder {
   }
 
   private int convertFloatToInt(float value) {
-    return (int) Math.round(value * maxPointValue);
+    if (value * maxPointValue > Integer.MAX_VALUE || value * maxPointValue < 
Integer.MIN_VALUE) {
+      useMaxPointNumber.add(false);
+      return Math.round(value);
+    } else {
+      useMaxPointNumber.add(true);
+      return (int) Math.round(value * maxPointValue);
+    }
   }
 
   private long convertDoubleToLong(double value) {
-    return Math.round(value * maxPointValue);
+    if (value * maxPointValue > Long.MAX_VALUE || value * maxPointValue < 
Long.MIN_VALUE) {
+      useMaxPointNumber.add(false);
+      return Math.round(value);
+    } else {
+      useMaxPointNumber.add(true);
+      return Math.round(value * maxPointValue);
+    }
   }
 
   @Override
   public void flush(ByteArrayOutputStream out) throws IOException {
     encoder.flush(out);
+    if (pointsNotUseMaxPointNumber()) {
+      byte[] ba = out.toByteArray();
+      out.reset();
+      ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out);
+      BitMap bitMap = new BitMap(useMaxPointNumber.size());
+      for (int i = 0; i < useMaxPointNumber.size(); i++) {
+        if (useMaxPointNumber.get(i)) {
+          bitMap.mark(i);
+        }
+      }
+      ReadWriteForEncodingUtils.writeUnsignedVarInt(useMaxPointNumber.size(), 
out);
+      out.write(bitMap.getByteArray());
+      out.write(ba);
+    }
     reset();
   }
 
   private void reset() {
     isMaxPointNumberSaved = false;
+    useMaxPointNumber.clear();
+  }
+
+  private boolean pointsNotUseMaxPointNumber() {
+    for (boolean info : useMaxPointNumber) {
+      if (!info) {
+        return true;
+      }
+    }
+    return false;
   }
 
   private void saveMaxPointNumber(ByteArrayOutputStream out) {
diff --git 
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
 
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
index bdc1db9f..417a4e0b 100644
--- 
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
+++ 
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
@@ -34,6 +34,7 @@ import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
 
+import static org.apache.tsfile.utils.EncodingUtils.roundWithGivenPrecision;
 import static org.junit.Assert.assertEquals;
 
 public class FloatDecoderTest {
@@ -203,6 +204,38 @@ public class FloatDecoderTest {
     }
   }
 
+  @Test
+  public void testBigFloat() throws Exception {
+    float a = 0.333F;
+    float b = 6.5536403E8F;
+    Encoder encoder = new FloatEncoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT, 
2);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    encoder.encode(a, baos);
+    encoder.encode(b, baos);
+    encoder.flush(baos);
+
+    ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+    Decoder decoder = new FloatDecoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT);
+    assertEquals(roundWithGivenPrecision(a, 2), decoder.readFloat(buffer), 
delta);
+    assertEquals(roundWithGivenPrecision(b, 2), decoder.readFloat(buffer), 
delta);
+  }
+
+  @Test
+  public void testBigDouble() throws Exception {
+    double a = 0.333;
+    double b = 9.223372036854E18;
+    Encoder encoder = new FloatEncoder(TSEncoding.RLE, TSDataType.DOUBLE, 2);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    encoder.encode(a, baos);
+    encoder.encode(b, baos);
+    encoder.flush(baos);
+
+    ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+    Decoder decoder = new FloatDecoder(TSEncoding.RLE, TSDataType.DOUBLE);
+    assertEquals(roundWithGivenPrecision(a, 2), decoder.readDouble(buffer), 
delta);
+    assertEquals(roundWithGivenPrecision(b, 2), decoder.readDouble(buffer), 
delta);
+  }
+
   // private void testDecimalLenght(TSEncoding encoding, List<Double> 
valueList,
   // int maxPointValue,
   // boolean isDebug, int repeatCount) throws Exception {
diff --git 
a/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java 
b/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
new file mode 100644
index 00000000..32dd29e4
--- /dev/null
+++ b/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile.utils;
+
+public class EncodingUtils {
+
+  // Copied from org.apache.iotdb.db.utils.MathUtils
+  public static float roundWithGivenPrecision(float data, int size) {
+    if (size == 0) {
+      return Math.round(data);
+    }
+    return Math.round(data)
+        + Math.round(((data - Math.round(data)) * Math.pow(10, size))) / 
(float) Math.pow(10, size);
+  }
+
+  // Copied from org.apache.iotdb.db.utils.MathUtils
+  public static double roundWithGivenPrecision(double data, int size) {
+    if (size == 0) {
+      return Math.round(data);
+    }
+    return Math.round(data)
+        + Math.round(((data - Math.round(data)) * Math.pow(10, size))) / 
Math.pow(10, size);
+  }
+}
diff --git 
a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java 
b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java
index d9add8de..7f74d162 100644
--- a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java
+++ b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java
@@ -134,8 +134,13 @@ public class TsFileReadWriteTest {
 
   public void floatTest(TSEncoding encoding) throws IOException, 
WriteProcessException {
     writeDataByTSRecord(
-        TSDataType.FLOAT, (i) -> new FloatDataPoint("sensor_1", (float) i), 
encoding);
-    readData((i, field, delta) -> assertEquals(i, field.getFloatV(), delta));
+        TSDataType.FLOAT,
+        (i) -> new FloatDataPoint("sensor_1", i % 2 == 0 ? 6.55364032E8F : i),
+        encoding);
+    readData(
+        (i, field, delta) ->
+            assertEquals(
+                encoding.toString(), i % 2 == 0 ? 6.55364032E8F : i, 
field.getFloatV(), delta));
   }
 
   @Test

(tsfile) branch develop updated: Fix float encoder overflow (#342)

Reply via email to