This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch dev/1.1
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/dev/1.1 by this push:
new 99eef393 Fix float encoder overflow when float value itself over int
range (#412)
99eef393 is described below
commit 99eef39313f6b44d746984513327547f04f5954f
Author: Haonan <[email protected]>
AuthorDate: Tue Feb 18 19:05:56 2025 +0800
Fix float encoder overflow when float value itself over int range (#412)
---
.github/workflows/code-coverage.yml | 2 +-
.../tsfile/encoding/decoder/FloatDecoder.java | 50 +++++++++++-
.../tsfile/encoding/encoder/FloatEncoder.java | 89 ++++++++++++++++++++--
.../tsfile/encoding/decoder/FloatDecoderTest.java | 44 +++++++++++
4 files changed, 175 insertions(+), 10 deletions(-)
diff --git a/.github/workflows/code-coverage.yml
b/.github/workflows/code-coverage.yml
index 5933b410..9a660dab 100644
--- a/.github/workflows/code-coverage.yml
+++ b/.github/workflows/code-coverage.yml
@@ -31,7 +31,7 @@ jobs:
- name: Generate code coverage reports
run: |
sudo apt-get install lcov
- ./mvnw -B -P with-java,with-cpp,with-python,with-code-coverage clean
verify
+ ./mvnw -B -P with-java,with-cpp,with-code-coverage clean verify
lcov --capture --directory cpp/target/build/test --output-file
cpp/target/build/test/coverage.info
lcov --remove cpp/target/build/test/coverage.info
'*/tsfile/cpp/test/*' --output-file cpp/target/build/test/coverage_filtered.info
genhtml cpp/target/build/test/coverage_filtered.info
--output-directory cpp/target/build/test/coverage_report
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
index 6b019930..56e44d3a 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
@@ -24,6 +24,7 @@ import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.exception.encoding.TsFileDecodingException;
import org.apache.tsfile.file.metadata.enums.TSEncoding;
import org.apache.tsfile.utils.Binary;
+import org.apache.tsfile.utils.BitMap;
import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
import org.slf4j.Logger;
@@ -39,7 +40,7 @@ import java.nio.ByteBuffer;
public class FloatDecoder extends Decoder {
private static final Logger logger =
LoggerFactory.getLogger(FloatDecoder.class);
- private Decoder decoder;
+ private final Decoder decoder;
/** maxPointValue = 10^(maxPointNumber). maxPointNumber can be read from the
stream. */
private double maxPointValue;
@@ -47,6 +48,10 @@ public class FloatDecoder extends Decoder {
/** flag that indicates whether we have read maxPointNumber and calculated
maxPointValue. */
private boolean isMaxPointNumberRead;
+ private BitMap isUnderflowInfo;
+ private BitMap valueItselfOverflowInfo;
+ private int position = 0;
+
public FloatDecoder(TSEncoding encodingType, TSDataType dataType) {
super(encodingType);
if (encodingType == TSEncoding.RLE) {
@@ -93,7 +98,12 @@ public class FloatDecoder extends Decoder {
public float readFloat(ByteBuffer buffer) {
readMaxPointValue(buffer);
int value = decoder.readInt(buffer);
- double result = value / maxPointValue;
+ if (valueItselfOverflowInfo != null &&
valueItselfOverflowInfo.isMarked(position)) {
+ position++;
+ return Float.intBitsToFloat(value);
+ }
+ double result = value / getMaxPointValue();
+ position++;
return (float) result;
}
@@ -101,13 +111,44 @@ public class FloatDecoder extends Decoder {
public double readDouble(ByteBuffer buffer) {
readMaxPointValue(buffer);
long value = decoder.readLong(buffer);
- return value / maxPointValue;
+ if (valueItselfOverflowInfo != null &&
valueItselfOverflowInfo.isMarked(position)) {
+ position++;
+ return Double.longBitsToDouble(value);
+ }
+ double result = value / getMaxPointValue();
+ position++;
+ return result;
+ }
+
+ private double getMaxPointValue() {
+ if (isUnderflowInfo == null) {
+ return maxPointValue;
+ } else {
+ return isUnderflowInfo.isMarked(position) ? maxPointValue : 1;
+ }
}
private void readMaxPointValue(ByteBuffer buffer) {
if (!isMaxPointNumberRead) {
int maxPointNumber =
ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
- if (maxPointNumber <= 0) {
+ if (maxPointNumber == Integer.MAX_VALUE) {
+ int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+ byte[] tmp = new byte[size / 8 + 1];
+ buffer.get(tmp, 0, size / 8 + 1);
+ isUnderflowInfo = new BitMap(size, tmp);
+ maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+ maxPointValue = Math.pow(10, maxPointNumber);
+ } else if (maxPointNumber == Integer.MAX_VALUE - 1) {
+ int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+ byte[] tmp = new byte[size / 8 + 1];
+ buffer.get(tmp, 0, size / 8 + 1);
+ isUnderflowInfo = new BitMap(size, tmp);
+ tmp = new byte[size / 8 + 1];
+ buffer.get(tmp, 0, size / 8 + 1);
+ valueItselfOverflowInfo = new BitMap(size, tmp);
+ maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+ maxPointValue = Math.pow(10, maxPointNumber);
+ } else if (maxPointNumber <= 0) {
maxPointValue = 1;
} else {
maxPointValue = Math.pow(10, maxPointNumber);
@@ -153,5 +194,6 @@ public class FloatDecoder extends Decoder {
public void reset() {
this.decoder.reset();
this.isMaxPointNumberRead = false;
+ this.position = 0;
}
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
index adf328e1..75d7b674 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
@@ -22,10 +22,13 @@ package org.apache.tsfile.encoding.encoder;
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.exception.encoding.TsFileEncodingException;
import org.apache.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.tsfile.utils.BitMap;
import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
/**
* Encoder for float or double value using rle or two-diff according to
following grammar.
@@ -38,7 +41,7 @@ import java.io.IOException;
*/
public class FloatEncoder extends Encoder {
- private Encoder encoder;
+ private final Encoder encoder;
/** number for accuracy of decimal places. */
private int maxPointNumber;
@@ -49,11 +52,17 @@ public class FloatEncoder extends Encoder {
/** flag to check whether maxPointNumber is saved in the stream. */
private boolean isMaxPointNumberSaved;
+ // value * maxPointValue not overflow -> True
+ // value * maxPointValue overflow -> False
+ // value itself overflow -> null
+ private final List<Boolean> underflowFlags;
+
public FloatEncoder(TSEncoding encodingType, TSDataType dataType, int
maxPointNumber) {
super(encodingType);
this.maxPointNumber = maxPointNumber;
- calculateMaxPonitNum();
+ calculateMaxPointNum();
isMaxPointNumberSaved = false;
+ underflowFlags = new ArrayList<>();
if (encodingType == TSEncoding.RLE) {
if (dataType == TSDataType.FLOAT) {
encoder = new IntRleEncoder();
@@ -101,7 +110,7 @@ public class FloatEncoder extends Encoder {
encoder.encode(valueLong, out);
}
- private void calculateMaxPonitNum() {
+ private void calculateMaxPointNum() {
if (maxPointNumber <= 0) {
maxPointNumber = 0;
maxPointValue = 1;
@@ -111,21 +120,91 @@ public class FloatEncoder extends Encoder {
}
private int convertFloatToInt(float value) {
- return (int) Math.round(value * maxPointValue);
+ if (value * maxPointValue > Integer.MAX_VALUE || value * maxPointValue <
Integer.MIN_VALUE) {
+ if (value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) {
+ underflowFlags.add(null);
+ return Float.floatToIntBits(value);
+ } else {
+ underflowFlags.add(false);
+ return Math.round(value);
+ }
+ } else {
+ if (Float.isNaN(value)) {
+ underflowFlags.add(null);
+ return Float.floatToIntBits(value);
+ } else {
+ underflowFlags.add(true);
+ return (int) Math.round(value * maxPointValue);
+ }
+ }
}
private long convertDoubleToLong(double value) {
- return Math.round(value * maxPointValue);
+ if (value * maxPointValue > Long.MAX_VALUE || value * maxPointValue <
Long.MIN_VALUE) {
+ if (value > Long.MAX_VALUE || value < Long.MIN_VALUE) {
+ underflowFlags.add(null);
+ return Double.doubleToLongBits(value);
+ } else {
+ underflowFlags.add(false);
+ return Math.round(value);
+ }
+ } else {
+ if (Double.isNaN(value)) {
+ underflowFlags.add(null);
+ return Double.doubleToLongBits(value);
+ } else {
+ underflowFlags.add(true);
+ return Math.round(value * maxPointValue);
+ }
+ }
}
@Override
public void flush(ByteArrayOutputStream out) throws IOException {
encoder.flush(out);
+ if (hasOverflow()) {
+ byte[] ba = out.toByteArray();
+ out.reset();
+ BitMap bitMapOfValueItselfOverflowInfo = null;
+ BitMap bitMapOfUnderflowInfo = new BitMap(underflowFlags.size());
+ for (int i = 0; i < underflowFlags.size(); i++) {
+ if (underflowFlags.get(i) == null) {
+ if (bitMapOfValueItselfOverflowInfo == null) {
+ bitMapOfValueItselfOverflowInfo = new
BitMap(underflowFlags.size());
+ }
+ bitMapOfValueItselfOverflowInfo.mark(i);
+ } else if (underflowFlags.get(i)) {
+ bitMapOfUnderflowInfo.mark(i);
+ }
+ }
+ if (bitMapOfValueItselfOverflowInfo != null) {
+ // flag of value itself contains
+ ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE - 1,
out);
+ } else {
+ ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out);
+ }
+ ReadWriteForEncodingUtils.writeUnsignedVarInt(underflowFlags.size(),
out);
+ out.write(bitMapOfUnderflowInfo.getByteArray());
+ if (bitMapOfValueItselfOverflowInfo != null) {
+ out.write(bitMapOfValueItselfOverflowInfo.getByteArray());
+ }
+ out.write(ba);
+ }
reset();
}
private void reset() {
isMaxPointNumberSaved = false;
+ underflowFlags.clear();
+ }
+
+ private Boolean hasOverflow() {
+ for (Boolean flag : underflowFlags) {
+ if (flag == null || !flag) {
+ return true;
+ }
+ }
+ return false;
}
private void saveMaxPointNumber(ByteArrayOutputStream out) {
diff --git
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
index bdc1db9f..6f56b584 100644
---
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
+++
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
@@ -203,6 +203,50 @@ public class FloatDecoderTest {
}
}
+ @Test
+ public void testBigFloat() throws Exception {
+ float a = 0.333F;
+ float b = 6.5536403E8F;
+ float c = 3.123456768E20F;
+ float d = Float.NaN;
+ Encoder encoder = new FloatEncoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT,
2);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ encoder.encode(a, baos);
+ encoder.encode(b, baos);
+ encoder.encode(c, baos);
+ encoder.encode(d, baos);
+ encoder.flush(baos);
+
+ ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+ Decoder decoder = new FloatDecoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT);
+ assertEquals(a, decoder.readFloat(buffer), 0.01);
+ assertEquals(b, decoder.readFloat(buffer), 0.01);
+ assertEquals(c, decoder.readFloat(buffer), 0.01);
+ assertEquals(d, decoder.readFloat(buffer), 0.01);
+ }
+
+ @Test
+ public void testBigDouble() throws Exception {
+ double a = 0.333;
+ double b = 9.223372036854E18;
+ double c = 9.223372036854E100;
+ double d = Double.NaN;
+ Encoder encoder = new FloatEncoder(TSEncoding.RLE, TSDataType.DOUBLE, 2);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ encoder.encode(a, baos);
+ encoder.encode(b, baos);
+ encoder.encode(c, baos);
+ encoder.encode(d, baos);
+ encoder.flush(baos);
+
+ ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+ Decoder decoder = new FloatDecoder(TSEncoding.RLE, TSDataType.DOUBLE);
+ assertEquals(a, decoder.readDouble(buffer), 0.01);
+ assertEquals(b, decoder.readDouble(buffer), 0.01);
+ assertEquals(c, decoder.readDouble(buffer), 0.01);
+ assertEquals(d, decoder.readDouble(buffer), 0.01);
+ }
+
// private void testDecimalLenght(TSEncoding encoding, List<Double>
valueList,
// int maxPointValue,
// boolean isDebug, int repeatCount) throws Exception {