This is an automated email from the ASF dual-hosted git repository. jiangtian pushed a commit to branch support_dict_for_string in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit fdc362e846d63f768ac698b36edaf77fae2a6512 Author: Tian Jiang <[email protected]> AuthorDate: Wed Sep 11 09:48:27 2024 +0800 Support dictionary encoding for STRING data type. Move datatype-encoding map into TsFile. --- .../tsfile/encoding/encoder/TSEncodingBuilder.java | 2 +- .../tsfile/file/metadata/enums/TSEncoding.java | 60 ++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java index 01b1daa8..68c7e56b 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java @@ -370,7 +370,7 @@ public abstract class TSEncodingBuilder { @Override public Encoder getEncoder(TSDataType type) { - if (type == TSDataType.TEXT) { + if (type == TSDataType.TEXT || type == TSDataType.STRING) { return new DictionaryEncoder(); } throw new UnSupportedDataTypeException("DICTIONARY doesn't support data type: " + type); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java index 43de4b8f..77482c6e 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java @@ -19,6 +19,13 @@ package org.apache.tsfile.file.metadata.enums; +import org.apache.tsfile.enums.TSDataType; + +import java.util.EnumMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + public enum TSEncoding { PLAIN((byte) 0), DICTIONARY((byte) 1), @@ -37,6 +44,55 @@ public enum TSEncoding { RLBE((byte) 13); private final byte type; + @SuppressWarnings("java:S2386") // used by other projects + public static final Map<TSDataType, Set<TSEncoding>> TYPE_SUPPORTED_ENCODINGS = + new EnumMap<>(TSDataType.class); + + static { + Set<TSEncoding> booleanSet = new HashSet<>(); + booleanSet.add(TSEncoding.PLAIN); + booleanSet.add(TSEncoding.RLE); + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.BOOLEAN, booleanSet); + + Set<TSEncoding> intSet = new HashSet<>(); + intSet.add(TSEncoding.PLAIN); + intSet.add(TSEncoding.RLE); + intSet.add(TSEncoding.TS_2DIFF); + intSet.add(TSEncoding.GORILLA); + intSet.add(TSEncoding.ZIGZAG); + intSet.add(TSEncoding.CHIMP); + intSet.add(TSEncoding.SPRINTZ); + intSet.add(TSEncoding.RLBE); + + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.INT32, intSet); + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.INT64, intSet); + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.TIMESTAMP, intSet); + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.DATE, intSet); + + Set<TSEncoding> floatSet = new HashSet<>(); + floatSet.add(TSEncoding.PLAIN); + floatSet.add(TSEncoding.RLE); + floatSet.add(TSEncoding.TS_2DIFF); + floatSet.add(TSEncoding.GORILLA_V1); + floatSet.add(TSEncoding.GORILLA); + floatSet.add(TSEncoding.CHIMP); + floatSet.add(TSEncoding.SPRINTZ); + floatSet.add(TSEncoding.RLBE); + + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.FLOAT, floatSet); + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.DOUBLE, floatSet); + + Set<TSEncoding> textSet = new HashSet<>(); + textSet.add(TSEncoding.PLAIN); + textSet.add(TSEncoding.DICTIONARY); + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.TEXT, textSet); + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.STRING, textSet); + + Set<TSEncoding> blobSet = new HashSet<>(); + blobSet.add(TSEncoding.PLAIN); + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.BLOB, blobSet); + } + TSEncoding(byte type) { this.type = type; } @@ -84,6 +140,10 @@ public enum TSEncoding { } } + public boolean isSupported(TSDataType type, TSEncoding encoding) { + return TYPE_SUPPORTED_ENCODINGS.get(type).contains(encoding); + } + public static int getSerializedSize() { return Byte.BYTES; }
