This is an automated email from the ASF dual-hosted git repository. haonan pushed a commit to branch encoding_check in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit f64bc8768d88095fd640617b3eecf8a6850dbdfa Author: HTHou <[email protected]> AuthorDate: Thu Sep 18 17:14:42 2025 +0800 Fix camel encoding error message and add check for each datatypes of supported encoding --- .../tsfile/encoding/encoder/TSEncodingBuilder.java | 55 +++++++++--------- .../tsfile/file/metadata/enums/TSEncoding.java | 7 ++- .../encoding/encoder/TSEncodingBuilderTest.java | 65 ++++++++++++++++++++++ 3 files changed, 99 insertions(+), 28 deletions(-) diff --git a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java index 7849607c..38b0731d 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java @@ -30,9 +30,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Map; +import java.util.Objects; /** - * Each subclass of TSEncodingBuilder responds a enumerate value in {@linkplain TSEncoding + * Each subclass of TSEncodingBuilder responds an enumerate value in {@linkplain TSEncoding * TSEncoding}, which stores several configuration related to responding encoding type to generate * {@linkplain Encoder Encoder} instance.<br> * Each TSEncoding has a responding TSEncodingBuilder. The design referring to visit pattern @@ -43,6 +44,7 @@ public abstract class TSEncodingBuilder { private static final Logger logger = LoggerFactory.getLogger(TSEncodingBuilder.class); protected final TSFileConfig conf; + private static final String ERROR_MSG = "%s doesn't support data type: %s"; protected TSEncodingBuilder() { this.conf = TSFileDescriptor.getInstance().getConfig(); @@ -64,8 +66,6 @@ public abstract class TSEncodingBuilder { return new Ts2Diff(); case GORILLA_V1: return new GorillaV1(); - case REGULAR: - return new Regular(); case GORILLA: return new GorillaV2(); case DICTIONARY: @@ -81,7 +81,7 @@ public abstract class TSEncodingBuilder { case CAMEL: return new Camel(); default: - throw new UnsupportedOperationException(type.toString()); + throw new UnsupportedOperationException("Unsupported encoding: " + type); } } @@ -124,7 +124,7 @@ public abstract class TSEncodingBuilder { if (props == null || !props.containsKey(Encoder.MAX_STRING_LENGTH)) { maxStringLength = TSFileDescriptor.getInstance().getConfig().getMaxStringLength(); } else { - maxStringLength = Integer.valueOf(props.get(Encoder.MAX_STRING_LENGTH)); + maxStringLength = Integer.parseInt(props.get(Encoder.MAX_STRING_LENGTH)); if (maxStringLength < 0) { maxStringLength = TSFileDescriptor.getInstance().getConfig().getMaxStringLength(); logger.warn( @@ -154,7 +154,7 @@ public abstract class TSEncodingBuilder { case DOUBLE: return new FloatEncoder(TSEncoding.RLE, type, maxPointNumber); default: - throw new UnSupportedDataTypeException("RLE doesn't support data type: " + type); + throw new UnSupportedDataTypeException(String.format(ERROR_MSG, TSEncoding.RLE, type)); } } @@ -209,15 +209,16 @@ public abstract class TSEncodingBuilder { case DOUBLE: return new FloatEncoder(TSEncoding.TS_2DIFF, type, maxPointNumber); default: - throw new UnSupportedDataTypeException("TS_2DIFF doesn't support data type: " + type); + throw new UnSupportedDataTypeException( + String.format(ERROR_MSG, TSEncoding.TS_2DIFF, type)); } } - @Override /** * TS_2DIFF could specify <b>max_point_number</b> in given JSON Object, which means the maximum * decimal digits for float or double data. */ + @Override public void initFromProps(Map<String, String> props) { // set max error from initialized map or default value if not set if (props == null || !props.containsKey(Encoder.MAX_POINT_NUMBER)) { @@ -257,13 +258,14 @@ public abstract class TSEncodingBuilder { case DOUBLE: return new DoublePrecisionEncoderV1(); default: - throw new UnSupportedDataTypeException("GORILLA_V1 doesn't support data type: " + type); + throw new UnSupportedDataTypeException( + String.format(ERROR_MSG, TSEncoding.GORILLA_V1, type)); } } @Override public void initFromProps(Map<String, String> props) { - // allowed do nothing + // allowed to do nothing } } @@ -272,17 +274,15 @@ public abstract class TSEncodingBuilder { @Override public Encoder getEncoder(TSDataType type) { - switch (type) { - case DOUBLE: - return new CamelEncoder(); - default: - throw new UnSupportedDataTypeException("GORILLA_V1 doesn't support data type: " + type); + if (Objects.requireNonNull(type) == TSDataType.DOUBLE) { + return new CamelEncoder(); } + throw new UnSupportedDataTypeException(String.format(ERROR_MSG, TSEncoding.CAMEL, type)); } @Override public void initFromProps(Map<String, String> props) { - // allowed do nothing + // allowed to do nothing } } @@ -299,13 +299,14 @@ public abstract class TSEncodingBuilder { case TIMESTAMP: return new RegularDataEncoder.LongRegularEncoder(); default: - throw new UnSupportedDataTypeException("REGULAR doesn't support data type: " + type); + throw new UnSupportedDataTypeException( + String.format(ERROR_MSG, TSEncoding.REGULAR, type)); } } @Override public void initFromProps(Map<String, String> props) { - // allowed do nothing + // allowed to do nothing } } @@ -326,13 +327,14 @@ public abstract class TSEncodingBuilder { case TIMESTAMP: return new LongGorillaEncoder(); default: - throw new UnSupportedDataTypeException("GORILLA doesn't support data type: " + type); + throw new UnSupportedDataTypeException( + String.format(ERROR_MSG, TSEncoding.GORILLA, type)); } } @Override public void initFromProps(Map<String, String> props) { - // allowed do nothing + // allowed to do nothing } } @@ -351,7 +353,8 @@ public abstract class TSEncodingBuilder { case DOUBLE: return new DoubleSprintzEncoder(); default: - throw new UnSupportedDataTypeException("Sprintz doesn't support data type: " + type); + throw new UnSupportedDataTypeException( + String.format(ERROR_MSG, TSEncoding.SPRINTZ, type)); } } @@ -377,7 +380,7 @@ public abstract class TSEncodingBuilder { case DOUBLE: return new DoubleRLBE(); default: - throw new UnSupportedDataTypeException("RLBE doesn't support data type: " + type); + throw new UnSupportedDataTypeException(String.format(ERROR_MSG, TSEncoding.RLBE, type)); } } @@ -394,7 +397,7 @@ public abstract class TSEncodingBuilder { if (type == TSDataType.TEXT || type == TSDataType.STRING) { return new DictionaryEncoder(); } - throw new UnSupportedDataTypeException("DICTIONARY doesn't support data type: " + type); + throw new UnSupportedDataTypeException(String.format(ERROR_MSG, TSEncoding.DICTIONARY, type)); } @Override @@ -415,7 +418,7 @@ public abstract class TSEncodingBuilder { case TIMESTAMP: return new LongZigzagEncoder(); default: - throw new UnSupportedDataTypeException("ZIGZAG doesn't support data type: " + type); + throw new UnSupportedDataTypeException(String.format(ERROR_MSG, TSEncoding.ZIGZAG, type)); } } @@ -442,13 +445,13 @@ public abstract class TSEncodingBuilder { case TIMESTAMP: return new LongChimpEncoder(); default: - throw new UnSupportedDataTypeException("CHIMP doesn't support data type: " + type); + throw new UnSupportedDataTypeException(String.format(ERROR_MSG, TSEncoding.CHIMP, type)); } } @Override public void initFromProps(Map<String, String> props) { - // allowed do nothing + // allowed to do nothing } } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java index d2ea1931..6c02cb1f 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java @@ -79,10 +79,13 @@ public enum TSEncoding { floatSet.add(TSEncoding.CHIMP); floatSet.add(TSEncoding.SPRINTZ); floatSet.add(TSEncoding.RLBE); - floatSet.add(TSEncoding.CAMEL); TYPE_SUPPORTED_ENCODINGS.put(TSDataType.FLOAT, floatSet); - TYPE_SUPPORTED_ENCODINGS.put(TSDataType.DOUBLE, floatSet); + + Set<TSEncoding> doubleSet = new HashSet<>(floatSet); + doubleSet.add(TSEncoding.CAMEL); + + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.DOUBLE, doubleSet); Set<TSEncoding> textSet = new HashSet<>(); textSet.add(TSEncoding.PLAIN); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilderTest.java b/java/tsfile/src/test/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilderTest.java new file mode 100644 index 00000000..7a866a34 --- /dev/null +++ b/java/tsfile/src/test/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilderTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.encoding.encoder; + +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.enums.TSEncoding; +import org.apache.tsfile.write.UnSupportedDataTypeException; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; + +public class TSEncodingBuilderTest { + + private static final String ERROR_MSG = "Unsupported dataType: %s doesn't support data type: %s"; + + @Test + public void testTSEncodingBuilder() { + Set<TSDataType> supportedDataTypes = + Arrays.stream(TSDataType.values()).collect(Collectors.toSet()); + supportedDataTypes.remove(TSDataType.VECTOR); + supportedDataTypes.remove(TSDataType.UNKNOWN); + + for (TSDataType dataType : supportedDataTypes) { + for (TSEncoding encoding : TSEncoding.values()) { + if (TSEncoding.isSupported(dataType, encoding)) { + try { + TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType); + } catch (UnSupportedDataTypeException e) { + Assert.fail(e.getMessage()); + } + } else { + try { + TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType); + Assert.fail(String.format(ERROR_MSG, encoding, dataType)); + } catch (UnsupportedOperationException e) { + Assert.assertEquals("Unsupported encoding: " + encoding, e.getMessage()); + } catch (UnSupportedDataTypeException e) { + Assert.assertEquals(String.format(ERROR_MSG, encoding, dataType), e.getMessage()); + } + } + } + } + } +}
