This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch rc/2.0.2
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/rc/2.0.2 by this push:
new 45fa5a8f Fix float encoder overflow when float value itself over int
range (#412)
45fa5a8f is described below
commit 45fa5a8f535c957933dc00019930c586ab91614a
Author: Haonan <[email protected]>
AuthorDate: Tue Feb 18 19:05:56 2025 +0800
Fix float encoder overflow when float value itself over int range (#412)
* Fix float encoder overflow when float value itself over int range
* fix double
* Keep NaN
(cherry picked from commit abfbfb8c77d75108ac6547c45ab1d89d23c3eeec)
---
.github/workflows/code-coverage.yml | 2 +-
.../tsfile/encoding/decoder/FloatDecoder.java | 29 ++++++--
.../tsfile/encoding/encoder/FloatEncoder.java | 83 ++++++++++++++++------
.../tsfile/encoding/decoder/FloatDecoderTest.java | 21 ++++--
.../org/apache/tsfile/utils/EncodingUtils.java | 41 -----------
5 files changed, 101 insertions(+), 75 deletions(-)
diff --git a/.github/workflows/code-coverage.yml
b/.github/workflows/code-coverage.yml
index ef9524e1..93e24da7 100644
--- a/.github/workflows/code-coverage.yml
+++ b/.github/workflows/code-coverage.yml
@@ -29,7 +29,7 @@ jobs:
- name: Generate code coverage reports
run: |
sudo apt-get install lcov
- ./mvnw -B -P with-java,with-cpp,with-python,with-code-coverage clean
verify
+ ./mvnw -B -P with-java,with-cpp,with-code-coverage clean verify
lcov --capture --directory cpp/target/build/test --output-file
cpp/target/build/test/coverage.info
lcov --remove cpp/target/build/test/coverage.info
'*/tsfile/cpp/test/*' --output-file cpp/target/build/test/coverage_filtered.info
genhtml cpp/target/build/test/coverage_filtered.info
--output-directory cpp/target/build/test/coverage_report
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
index 8f2510a2..56e44d3a 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
@@ -40,7 +40,7 @@ import java.nio.ByteBuffer;
public class FloatDecoder extends Decoder {
private static final Logger logger =
LoggerFactory.getLogger(FloatDecoder.class);
- private Decoder decoder;
+ private final Decoder decoder;
/** maxPointValue = 10^(maxPointNumber). maxPointNumber can be read from the
stream. */
private double maxPointValue;
@@ -48,7 +48,8 @@ public class FloatDecoder extends Decoder {
/** flag that indicates whether we have read maxPointNumber and calculated
maxPointValue. */
private boolean isMaxPointNumberRead;
- private BitMap useMaxPointNumber;
+ private BitMap isUnderflowInfo;
+ private BitMap valueItselfOverflowInfo;
private int position = 0;
public FloatDecoder(TSEncoding encodingType, TSDataType dataType) {
@@ -97,6 +98,10 @@ public class FloatDecoder extends Decoder {
public float readFloat(ByteBuffer buffer) {
readMaxPointValue(buffer);
int value = decoder.readInt(buffer);
+ if (valueItselfOverflowInfo != null &&
valueItselfOverflowInfo.isMarked(position)) {
+ position++;
+ return Float.intBitsToFloat(value);
+ }
double result = value / getMaxPointValue();
position++;
return (float) result;
@@ -106,16 +111,20 @@ public class FloatDecoder extends Decoder {
public double readDouble(ByteBuffer buffer) {
readMaxPointValue(buffer);
long value = decoder.readLong(buffer);
+ if (valueItselfOverflowInfo != null &&
valueItselfOverflowInfo.isMarked(position)) {
+ position++;
+ return Double.longBitsToDouble(value);
+ }
double result = value / getMaxPointValue();
position++;
return result;
}
private double getMaxPointValue() {
- if (useMaxPointNumber == null) {
+ if (isUnderflowInfo == null) {
return maxPointValue;
} else {
- return useMaxPointNumber.isMarked(position) ? maxPointValue : 1;
+ return isUnderflowInfo.isMarked(position) ? maxPointValue : 1;
}
}
@@ -126,7 +135,17 @@ public class FloatDecoder extends Decoder {
int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
byte[] tmp = new byte[size / 8 + 1];
buffer.get(tmp, 0, size / 8 + 1);
- useMaxPointNumber = new BitMap(size, tmp);
+ isUnderflowInfo = new BitMap(size, tmp);
+ maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+ maxPointValue = Math.pow(10, maxPointNumber);
+ } else if (maxPointNumber == Integer.MAX_VALUE - 1) {
+ int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+ byte[] tmp = new byte[size / 8 + 1];
+ buffer.get(tmp, 0, size / 8 + 1);
+ isUnderflowInfo = new BitMap(size, tmp);
+ tmp = new byte[size / 8 + 1];
+ buffer.get(tmp, 0, size / 8 + 1);
+ valueItselfOverflowInfo = new BitMap(size, tmp);
maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
maxPointValue = Math.pow(10, maxPointNumber);
} else if (maxPointNumber <= 0) {
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
index dccb2a93..75d7b674 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
@@ -41,7 +41,7 @@ import java.util.List;
*/
public class FloatEncoder extends Encoder {
- private Encoder encoder;
+ private final Encoder encoder;
/** number for accuracy of decimal places. */
private int maxPointNumber;
@@ -52,14 +52,17 @@ public class FloatEncoder extends Encoder {
/** flag to check whether maxPointNumber is saved in the stream. */
private boolean isMaxPointNumberSaved;
- private final List<Boolean> useMaxPointNumber;
+ // value * maxPointValue not overflow -> True
+ // value * maxPointValue overflow -> False
+ // value itself overflow -> null
+ private final List<Boolean> underflowFlags;
public FloatEncoder(TSEncoding encodingType, TSDataType dataType, int
maxPointNumber) {
super(encodingType);
this.maxPointNumber = maxPointNumber;
calculateMaxPointNum();
isMaxPointNumberSaved = false;
- useMaxPointNumber = new ArrayList<>();
+ underflowFlags = new ArrayList<>();
if (encodingType == TSEncoding.RLE) {
if (dataType == TSDataType.FLOAT) {
encoder = new IntRleEncoder();
@@ -118,39 +121,73 @@ public class FloatEncoder extends Encoder {
private int convertFloatToInt(float value) {
if (value * maxPointValue > Integer.MAX_VALUE || value * maxPointValue <
Integer.MIN_VALUE) {
- useMaxPointNumber.add(false);
- return Math.round(value);
+ if (value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) {
+ underflowFlags.add(null);
+ return Float.floatToIntBits(value);
+ } else {
+ underflowFlags.add(false);
+ return Math.round(value);
+ }
} else {
- useMaxPointNumber.add(true);
- return (int) Math.round(value * maxPointValue);
+ if (Float.isNaN(value)) {
+ underflowFlags.add(null);
+ return Float.floatToIntBits(value);
+ } else {
+ underflowFlags.add(true);
+ return (int) Math.round(value * maxPointValue);
+ }
}
}
private long convertDoubleToLong(double value) {
if (value * maxPointValue > Long.MAX_VALUE || value * maxPointValue <
Long.MIN_VALUE) {
- useMaxPointNumber.add(false);
- return Math.round(value);
+ if (value > Long.MAX_VALUE || value < Long.MIN_VALUE) {
+ underflowFlags.add(null);
+ return Double.doubleToLongBits(value);
+ } else {
+ underflowFlags.add(false);
+ return Math.round(value);
+ }
} else {
- useMaxPointNumber.add(true);
- return Math.round(value * maxPointValue);
+ if (Double.isNaN(value)) {
+ underflowFlags.add(null);
+ return Double.doubleToLongBits(value);
+ } else {
+ underflowFlags.add(true);
+ return Math.round(value * maxPointValue);
+ }
}
}
@Override
public void flush(ByteArrayOutputStream out) throws IOException {
encoder.flush(out);
- if (pointsNotUseMaxPointNumber()) {
+ if (hasOverflow()) {
byte[] ba = out.toByteArray();
out.reset();
- ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out);
- BitMap bitMap = new BitMap(useMaxPointNumber.size());
- for (int i = 0; i < useMaxPointNumber.size(); i++) {
- if (useMaxPointNumber.get(i)) {
- bitMap.mark(i);
+ BitMap bitMapOfValueItselfOverflowInfo = null;
+ BitMap bitMapOfUnderflowInfo = new BitMap(underflowFlags.size());
+ for (int i = 0; i < underflowFlags.size(); i++) {
+ if (underflowFlags.get(i) == null) {
+ if (bitMapOfValueItselfOverflowInfo == null) {
+ bitMapOfValueItselfOverflowInfo = new
BitMap(underflowFlags.size());
+ }
+ bitMapOfValueItselfOverflowInfo.mark(i);
+ } else if (underflowFlags.get(i)) {
+ bitMapOfUnderflowInfo.mark(i);
}
}
- ReadWriteForEncodingUtils.writeUnsignedVarInt(useMaxPointNumber.size(),
out);
- out.write(bitMap.getByteArray());
+ if (bitMapOfValueItselfOverflowInfo != null) {
+ // flag of value itself contains
+ ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE - 1,
out);
+ } else {
+ ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out);
+ }
+ ReadWriteForEncodingUtils.writeUnsignedVarInt(underflowFlags.size(),
out);
+ out.write(bitMapOfUnderflowInfo.getByteArray());
+ if (bitMapOfValueItselfOverflowInfo != null) {
+ out.write(bitMapOfValueItselfOverflowInfo.getByteArray());
+ }
out.write(ba);
}
reset();
@@ -158,12 +195,12 @@ public class FloatEncoder extends Encoder {
private void reset() {
isMaxPointNumberSaved = false;
- useMaxPointNumber.clear();
+ underflowFlags.clear();
}
- private boolean pointsNotUseMaxPointNumber() {
- for (boolean info : useMaxPointNumber) {
- if (!info) {
+ private Boolean hasOverflow() {
+ for (Boolean flag : underflowFlags) {
+ if (flag == null || !flag) {
return true;
}
}
diff --git
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
index 417a4e0b..6f56b584 100644
---
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
+++
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
@@ -34,7 +34,6 @@ import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
-import static org.apache.tsfile.utils.EncodingUtils.roundWithGivenPrecision;
import static org.junit.Assert.assertEquals;
public class FloatDecoderTest {
@@ -208,32 +207,44 @@ public class FloatDecoderTest {
public void testBigFloat() throws Exception {
float a = 0.333F;
float b = 6.5536403E8F;
+ float c = 3.123456768E20F;
+ float d = Float.NaN;
Encoder encoder = new FloatEncoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT,
2);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
encoder.encode(a, baos);
encoder.encode(b, baos);
+ encoder.encode(c, baos);
+ encoder.encode(d, baos);
encoder.flush(baos);
ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
Decoder decoder = new FloatDecoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT);
- assertEquals(roundWithGivenPrecision(a, 2), decoder.readFloat(buffer),
delta);
- assertEquals(roundWithGivenPrecision(b, 2), decoder.readFloat(buffer),
delta);
+ assertEquals(a, decoder.readFloat(buffer), 0.01);
+ assertEquals(b, decoder.readFloat(buffer), 0.01);
+ assertEquals(c, decoder.readFloat(buffer), 0.01);
+ assertEquals(d, decoder.readFloat(buffer), 0.01);
}
@Test
public void testBigDouble() throws Exception {
double a = 0.333;
double b = 9.223372036854E18;
+ double c = 9.223372036854E100;
+ double d = Double.NaN;
Encoder encoder = new FloatEncoder(TSEncoding.RLE, TSDataType.DOUBLE, 2);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
encoder.encode(a, baos);
encoder.encode(b, baos);
+ encoder.encode(c, baos);
+ encoder.encode(d, baos);
encoder.flush(baos);
ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
Decoder decoder = new FloatDecoder(TSEncoding.RLE, TSDataType.DOUBLE);
- assertEquals(roundWithGivenPrecision(a, 2), decoder.readDouble(buffer),
delta);
- assertEquals(roundWithGivenPrecision(b, 2), decoder.readDouble(buffer),
delta);
+ assertEquals(a, decoder.readDouble(buffer), 0.01);
+ assertEquals(b, decoder.readDouble(buffer), 0.01);
+ assertEquals(c, decoder.readDouble(buffer), 0.01);
+ assertEquals(d, decoder.readDouble(buffer), 0.01);
}
// private void testDecimalLenght(TSEncoding encoding, List<Double>
valueList,
diff --git
a/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
b/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
deleted file mode 100644
index 32dd29e4..00000000
--- a/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.tsfile.utils;
-
-public class EncodingUtils {
-
- // Copied from org.apache.iotdb.db.utils.MathUtils
- public static float roundWithGivenPrecision(float data, int size) {
- if (size == 0) {
- return Math.round(data);
- }
- return Math.round(data)
- + Math.round(((data - Math.round(data)) * Math.pow(10, size))) /
(float) Math.pow(10, size);
- }
-
- // Copied from org.apache.iotdb.db.utils.MathUtils
- public static double roundWithGivenPrecision(double data, int size) {
- if (size == 0) {
- return Math.round(data);
- }
- return Math.round(data)
- + Math.round(((data - Math.round(data)) * Math.pow(10, size))) /
Math.pow(10, size);
- }
-}