This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch support_dict_for_string
in repository https://gitbox.apache.org/repos/asf/tsfile.git

commit fdc362e846d63f768ac698b36edaf77fae2a6512
Author: Tian Jiang <[email protected]>
AuthorDate: Wed Sep 11 09:48:27 2024 +0800

    Support dictionary encoding for STRING data type.
    Move datatype-encoding map into TsFile.
---
 .../tsfile/encoding/encoder/TSEncodingBuilder.java |  2 +-
 .../tsfile/file/metadata/enums/TSEncoding.java     | 60 ++++++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java
index 01b1daa8..68c7e56b 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/TSEncodingBuilder.java
@@ -370,7 +370,7 @@ public abstract class TSEncodingBuilder {
 
     @Override
     public Encoder getEncoder(TSDataType type) {
-      if (type == TSDataType.TEXT) {
+      if (type == TSDataType.TEXT || type == TSDataType.STRING) {
         return new DictionaryEncoder();
       }
       throw new UnSupportedDataTypeException("DICTIONARY doesn't support data 
type: " + type);
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java
index 43de4b8f..77482c6e 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java
@@ -19,6 +19,13 @@
 
 package org.apache.tsfile.file.metadata.enums;
 
+import org.apache.tsfile.enums.TSDataType;
+
+import java.util.EnumMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
 public enum TSEncoding {
   PLAIN((byte) 0),
   DICTIONARY((byte) 1),
@@ -37,6 +44,55 @@ public enum TSEncoding {
   RLBE((byte) 13);
   private final byte type;
 
+  @SuppressWarnings("java:S2386") // used by other projects
+  public static final Map<TSDataType, Set<TSEncoding>> 
TYPE_SUPPORTED_ENCODINGS =
+      new EnumMap<>(TSDataType.class);
+
+  static {
+    Set<TSEncoding> booleanSet = new HashSet<>();
+    booleanSet.add(TSEncoding.PLAIN);
+    booleanSet.add(TSEncoding.RLE);
+    TYPE_SUPPORTED_ENCODINGS.put(TSDataType.BOOLEAN, booleanSet);
+
+    Set<TSEncoding> intSet = new HashSet<>();
+    intSet.add(TSEncoding.PLAIN);
+    intSet.add(TSEncoding.RLE);
+    intSet.add(TSEncoding.TS_2DIFF);
+    intSet.add(TSEncoding.GORILLA);
+    intSet.add(TSEncoding.ZIGZAG);
+    intSet.add(TSEncoding.CHIMP);
+    intSet.add(TSEncoding.SPRINTZ);
+    intSet.add(TSEncoding.RLBE);
+
+    TYPE_SUPPORTED_ENCODINGS.put(TSDataType.INT32, intSet);
+    TYPE_SUPPORTED_ENCODINGS.put(TSDataType.INT64, intSet);
+    TYPE_SUPPORTED_ENCODINGS.put(TSDataType.TIMESTAMP, intSet);
+    TYPE_SUPPORTED_ENCODINGS.put(TSDataType.DATE, intSet);
+
+    Set<TSEncoding> floatSet = new HashSet<>();
+    floatSet.add(TSEncoding.PLAIN);
+    floatSet.add(TSEncoding.RLE);
+    floatSet.add(TSEncoding.TS_2DIFF);
+    floatSet.add(TSEncoding.GORILLA_V1);
+    floatSet.add(TSEncoding.GORILLA);
+    floatSet.add(TSEncoding.CHIMP);
+    floatSet.add(TSEncoding.SPRINTZ);
+    floatSet.add(TSEncoding.RLBE);
+
+    TYPE_SUPPORTED_ENCODINGS.put(TSDataType.FLOAT, floatSet);
+    TYPE_SUPPORTED_ENCODINGS.put(TSDataType.DOUBLE, floatSet);
+
+    Set<TSEncoding> textSet = new HashSet<>();
+    textSet.add(TSEncoding.PLAIN);
+    textSet.add(TSEncoding.DICTIONARY);
+    TYPE_SUPPORTED_ENCODINGS.put(TSDataType.TEXT, textSet);
+    TYPE_SUPPORTED_ENCODINGS.put(TSDataType.STRING, textSet);
+
+    Set<TSEncoding> blobSet = new HashSet<>();
+    blobSet.add(TSEncoding.PLAIN);
+    TYPE_SUPPORTED_ENCODINGS.put(TSDataType.BLOB, blobSet);
+  }
+
   TSEncoding(byte type) {
     this.type = type;
   }
@@ -84,6 +140,10 @@ public enum TSEncoding {
     }
   }
 
+  public boolean isSupported(TSDataType type, TSEncoding encoding) {
+    return TYPE_SUPPORTED_ENCODINGS.get(type).contains(encoding);
+  }
+
   public static int getSerializedSize() {
     return Byte.BYTES;
   }

Reply via email to