This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new 654e5091 Fix float encoder overflow (#342)
654e5091 is described below
commit 654e50915eb148f57cdaf38aa8de5e22d564db2f
Author: Haonan <[email protected]>
AuthorDate: Wed Dec 25 12:11:05 2024 +0800
Fix float encoder overflow (#342)
* try one try
* Fix FloatEncoder overflow problem
* add more UT
* add more UT
---
.../tsfile/encoding/decoder/FloatDecoder.java | 29 +++++++++++--
.../tsfile/encoding/encoder/FloatEncoder.java | 50 ++++++++++++++++++++--
.../tsfile/encoding/decoder/FloatDecoderTest.java | 33 ++++++++++++++
.../org/apache/tsfile/utils/EncodingUtils.java | 41 ++++++++++++++++++
.../apache/tsfile/write/TsFileReadWriteTest.java | 9 +++-
5 files changed, 153 insertions(+), 9 deletions(-)
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
index 6b019930..8f2510a2 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
@@ -24,6 +24,7 @@ import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.exception.encoding.TsFileDecodingException;
import org.apache.tsfile.file.metadata.enums.TSEncoding;
import org.apache.tsfile.utils.Binary;
+import org.apache.tsfile.utils.BitMap;
import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
import org.slf4j.Logger;
@@ -47,6 +48,9 @@ public class FloatDecoder extends Decoder {
/** flag that indicates whether we have read maxPointNumber and calculated
maxPointValue. */
private boolean isMaxPointNumberRead;
+ private BitMap useMaxPointNumber;
+ private int position = 0;
+
public FloatDecoder(TSEncoding encodingType, TSDataType dataType) {
super(encodingType);
if (encodingType == TSEncoding.RLE) {
@@ -93,7 +97,8 @@ public class FloatDecoder extends Decoder {
public float readFloat(ByteBuffer buffer) {
readMaxPointValue(buffer);
int value = decoder.readInt(buffer);
- double result = value / maxPointValue;
+ double result = value / getMaxPointValue();
+ position++;
return (float) result;
}
@@ -101,13 +106,30 @@ public class FloatDecoder extends Decoder {
public double readDouble(ByteBuffer buffer) {
readMaxPointValue(buffer);
long value = decoder.readLong(buffer);
- return value / maxPointValue;
+ double result = value / getMaxPointValue();
+ position++;
+ return result;
+ }
+
+ private double getMaxPointValue() {
+ if (useMaxPointNumber == null) {
+ return maxPointValue;
+ } else {
+ return useMaxPointNumber.isMarked(position) ? maxPointValue : 1;
+ }
}
private void readMaxPointValue(ByteBuffer buffer) {
if (!isMaxPointNumberRead) {
int maxPointNumber =
ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
- if (maxPointNumber <= 0) {
+ if (maxPointNumber == Integer.MAX_VALUE) {
+ int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+ byte[] tmp = new byte[size / 8 + 1];
+ buffer.get(tmp, 0, size / 8 + 1);
+ useMaxPointNumber = new BitMap(size, tmp);
+ maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+ maxPointValue = Math.pow(10, maxPointNumber);
+ } else if (maxPointNumber <= 0) {
maxPointValue = 1;
} else {
maxPointValue = Math.pow(10, maxPointNumber);
@@ -153,5 +175,6 @@ public class FloatDecoder extends Decoder {
public void reset() {
this.decoder.reset();
this.isMaxPointNumberRead = false;
+ this.position = 0;
}
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
index adf328e1..dccb2a93 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
@@ -22,10 +22,13 @@ package org.apache.tsfile.encoding.encoder;
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.exception.encoding.TsFileEncodingException;
import org.apache.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.tsfile.utils.BitMap;
import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
/**
* Encoder for float or double value using rle or two-diff according to
following grammar.
@@ -49,11 +52,14 @@ public class FloatEncoder extends Encoder {
/** flag to check whether maxPointNumber is saved in the stream. */
private boolean isMaxPointNumberSaved;
+ private final List<Boolean> useMaxPointNumber;
+
public FloatEncoder(TSEncoding encodingType, TSDataType dataType, int
maxPointNumber) {
super(encodingType);
this.maxPointNumber = maxPointNumber;
- calculateMaxPonitNum();
+ calculateMaxPointNum();
isMaxPointNumberSaved = false;
+ useMaxPointNumber = new ArrayList<>();
if (encodingType == TSEncoding.RLE) {
if (dataType == TSDataType.FLOAT) {
encoder = new IntRleEncoder();
@@ -101,7 +107,7 @@ public class FloatEncoder extends Encoder {
encoder.encode(valueLong, out);
}
- private void calculateMaxPonitNum() {
+ private void calculateMaxPointNum() {
if (maxPointNumber <= 0) {
maxPointNumber = 0;
maxPointValue = 1;
@@ -111,21 +117,57 @@ public class FloatEncoder extends Encoder {
}
private int convertFloatToInt(float value) {
- return (int) Math.round(value * maxPointValue);
+ if (value * maxPointValue > Integer.MAX_VALUE || value * maxPointValue <
Integer.MIN_VALUE) {
+ useMaxPointNumber.add(false);
+ return Math.round(value);
+ } else {
+ useMaxPointNumber.add(true);
+ return (int) Math.round(value * maxPointValue);
+ }
}
private long convertDoubleToLong(double value) {
- return Math.round(value * maxPointValue);
+ if (value * maxPointValue > Long.MAX_VALUE || value * maxPointValue <
Long.MIN_VALUE) {
+ useMaxPointNumber.add(false);
+ return Math.round(value);
+ } else {
+ useMaxPointNumber.add(true);
+ return Math.round(value * maxPointValue);
+ }
}
@Override
public void flush(ByteArrayOutputStream out) throws IOException {
encoder.flush(out);
+ if (pointsNotUseMaxPointNumber()) {
+ byte[] ba = out.toByteArray();
+ out.reset();
+ ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out);
+ BitMap bitMap = new BitMap(useMaxPointNumber.size());
+ for (int i = 0; i < useMaxPointNumber.size(); i++) {
+ if (useMaxPointNumber.get(i)) {
+ bitMap.mark(i);
+ }
+ }
+ ReadWriteForEncodingUtils.writeUnsignedVarInt(useMaxPointNumber.size(),
out);
+ out.write(bitMap.getByteArray());
+ out.write(ba);
+ }
reset();
}
private void reset() {
isMaxPointNumberSaved = false;
+ useMaxPointNumber.clear();
+ }
+
+ private boolean pointsNotUseMaxPointNumber() {
+ for (boolean info : useMaxPointNumber) {
+ if (!info) {
+ return true;
+ }
+ }
+ return false;
}
private void saveMaxPointNumber(ByteArrayOutputStream out) {
diff --git
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
index bdc1db9f..417a4e0b 100644
---
a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
+++
b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
@@ -34,6 +34,7 @@ import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
+import static org.apache.tsfile.utils.EncodingUtils.roundWithGivenPrecision;
import static org.junit.Assert.assertEquals;
public class FloatDecoderTest {
@@ -203,6 +204,38 @@ public class FloatDecoderTest {
}
}
+ @Test
+ public void testBigFloat() throws Exception {
+ float a = 0.333F;
+ float b = 6.5536403E8F;
+ Encoder encoder = new FloatEncoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT,
2);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ encoder.encode(a, baos);
+ encoder.encode(b, baos);
+ encoder.flush(baos);
+
+ ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+ Decoder decoder = new FloatDecoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT);
+ assertEquals(roundWithGivenPrecision(a, 2), decoder.readFloat(buffer),
delta);
+ assertEquals(roundWithGivenPrecision(b, 2), decoder.readFloat(buffer),
delta);
+ }
+
+ @Test
+ public void testBigDouble() throws Exception {
+ double a = 0.333;
+ double b = 9.223372036854E18;
+ Encoder encoder = new FloatEncoder(TSEncoding.RLE, TSDataType.DOUBLE, 2);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ encoder.encode(a, baos);
+ encoder.encode(b, baos);
+ encoder.flush(baos);
+
+ ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+ Decoder decoder = new FloatDecoder(TSEncoding.RLE, TSDataType.DOUBLE);
+ assertEquals(roundWithGivenPrecision(a, 2), decoder.readDouble(buffer),
delta);
+ assertEquals(roundWithGivenPrecision(b, 2), decoder.readDouble(buffer),
delta);
+ }
+
// private void testDecimalLenght(TSEncoding encoding, List<Double>
valueList,
// int maxPointValue,
// boolean isDebug, int repeatCount) throws Exception {
diff --git
a/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
b/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
new file mode 100644
index 00000000..32dd29e4
--- /dev/null
+++ b/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile.utils;
+
+public class EncodingUtils {
+
+ // Copied from org.apache.iotdb.db.utils.MathUtils
+ public static float roundWithGivenPrecision(float data, int size) {
+ if (size == 0) {
+ return Math.round(data);
+ }
+ return Math.round(data)
+ + Math.round(((data - Math.round(data)) * Math.pow(10, size))) /
(float) Math.pow(10, size);
+ }
+
+ // Copied from org.apache.iotdb.db.utils.MathUtils
+ public static double roundWithGivenPrecision(double data, int size) {
+ if (size == 0) {
+ return Math.round(data);
+ }
+ return Math.round(data)
+ + Math.round(((data - Math.round(data)) * Math.pow(10, size))) /
Math.pow(10, size);
+ }
+}
diff --git
a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java
b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java
index d9add8de..7f74d162 100644
--- a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java
+++ b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java
@@ -134,8 +134,13 @@ public class TsFileReadWriteTest {
public void floatTest(TSEncoding encoding) throws IOException,
WriteProcessException {
writeDataByTSRecord(
- TSDataType.FLOAT, (i) -> new FloatDataPoint("sensor_1", (float) i),
encoding);
- readData((i, field, delta) -> assertEquals(i, field.getFloatV(), delta));
+ TSDataType.FLOAT,
+ (i) -> new FloatDataPoint("sensor_1", i % 2 == 0 ? 6.55364032E8F : i),
+ encoding);
+ readData(
+ (i, field, delta) ->
+ assertEquals(
+ encoding.toString(), i % 2 == 0 ? 6.55364032E8F : i,
field.getFloatV(), delta));
}
@Test