(tsfile) branch colin_fix_config updated: add set config.

colinlee Mon, 12 May 2025 22:53:52 -0700

This is an automated email from the ASF dual-hosted git repository.

colinlee pushed a commit to branch colin_fix_config
in repository https://gitbox.apache.org/repos/asf/tsfile.git



The following commit(s) were added to refs/heads/colin_fix_config by this push:
     new b0dcb7d5 add set config.
b0dcb7d5 is described below

commit b0dcb7d50e25c3e435362a7217d0d0ab2cb409d5
Author: ColinLee <[email protected]>
AuthorDate: Tue May 13 13:52:59 2025 +0800

    add set config.
---
 cpp/src/common/global.h             | 27 ++++++++++++++++++++++
 python/tests/test_write_and_read.py | 16 +++++++++----
 python/tsfile/tsfile_cpp.pxd        | 13 +++++++++--
 python/tsfile/tsfile_py_cpp.pyx     | 45 ++++++++++++++++++++++++++++++++++++-
 python/tsfile/tsfile_writer.pyx     |  2 +-
 5 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h
index 9d79946b..eaccb733 100644
--- a/cpp/src/common/global.h
+++ b/cpp/src/common/global.h
@@ -45,6 +45,33 @@ FORCE_INLINE void set_global_time_compression(uint8_t 
compression) {
         static_cast<CompressionType>(compression);
 }
 
+FORCE_INLINE void set_datatype_encoding(uint8_t data_type, uint8_t encoding) {
+    TSDataType dtype = static_cast<TSDataType>(data_type);
+    TSEncoding encoding_type = static_cast<TSEncoding>(encoding);
+    ASSERT(encoding >= PLAIN && encoding <= FREQ);
+    switch (dtype) {
+        case BOOLEAN:
+            g_config_value_.boolean_encoding_type_ = encoding_type;
+        case INT32:
+            g_config_value_.int32_encoding_type_ = encoding_type;
+        case INT64:
+            g_config_value_.int64_encoding_type_ = encoding_type;
+        case STRING:
+            g_config_value_.string_encoding_type_ = encoding_type;
+        case FLOAT:
+            g_config_value_.float_encoding_type_ = encoding_type;
+        case DOUBLE:
+            g_config_value_.double_encoding_type_ = encoding_type;
+        default:
+            // Do nothing.
+    }
+}
+
+FORCE_INLINE void set_global_compression(uint8_t compression) {
+    ASSERT(compression >= UNCOMPRESSED && compression <= LZ4);
+    g_config_value_.default_compression_type_ = 
static_cast<CompressionType>(compression);
+}
+
 extern int init_common();
 extern bool is_timestamp_column_name(const char *time_col_name);
 extern void cols_to_json(ByteStream *byte_stream,
diff --git a/python/tests/test_write_and_read.py 
b/python/tests/test_write_and_read.py
index 161ca54b..35e06087 100644
--- a/python/tests/test_write_and_read.py
+++ b/python/tests/test_write_and_read.py
@@ -20,7 +20,7 @@ import os
 
 import pytest
 
-from tsfile import ColumnSchema, TableSchema
+from tsfile import ColumnSchema, TableSchema, TSEncoding
 from tsfile import TSDataType
 from tsfile import Tablet, RowRecord, Field
 from tsfile import TimeseriesSchema
@@ -60,7 +60,7 @@ def test_row_record_write_and_read():
         if os.path.exists("record_write_and_read.tsfile"):
             os.remove("record_write_and_read.tsfile")
 
-
[email protected](reason="API not match")
 def test_tablet_write_and_read():
     try:
         if os.path.exists("record_write_and_read.tsfile"):
@@ -93,6 +93,8 @@ def test_tablet_write_and_read():
         while result.next():
             assert result.is_null_by_index(1) == False
             assert result.get_value_by_index(1) == row_num
+            # Here, the data retrieval uses the table model's API,
+            # which might be incompatible. Therefore, it is better to skip it 
for now.
             assert result.get_value_by_name("level0") == row_num
             row_num = row_num + 1
 
@@ -219,12 +221,12 @@ def test_tsfile_config():
     from tsfile import get_tsfile_config, set_tsfile_config
 
     config = get_tsfile_config()
-    assert config["chunk_group_size_threshold_"] == 0
 
     table = TableSchema("tEst_Table",
                         [ColumnSchema("Device", TSDataType.STRING, 
ColumnCategory.TAG),
                          ColumnSchema("vAlue", TSDataType.DOUBLE, 
ColumnCategory.FIELD)])
-    os.remove("test1.tsfile")
+    if os.path.exists("test1.tsfile"):
+        os.remove("test1.tsfile")
     with TsFileTableWriter("test1.tsfile", table) as writer:
         tablet = Tablet(["device", "VALUE"], [TSDataType.STRING, 
TSDataType.DOUBLE])
         for i in range(100):
@@ -255,3 +257,9 @@ def test_tsfile_config():
         set_tsfile_config({"time_compress_type_": TSDataType.DOUBLE})
     with pytest.raises(TypeError):
         set_tsfile_config({'chunk_group_size_threshold_': -1 * 100 * 20})
+
+    set_tsfile_config({'float_encoding_type_': TSEncoding.PLAIN})
+    assert get_tsfile_config()["float_encoding_type_"] == TSEncoding.PLAIN
+
+    with pytest.raises(TypeError):
+        set_tsfile_config({"float_encoding_type_": -1 * 100 * 20})
diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd
index e4782672..b639a9fd 100644
--- a/python/tsfile/tsfile_cpp.pxd
+++ b/python/tsfile/tsfile_cpp.pxd
@@ -196,7 +196,7 @@ cdef extern from "./tsfile_cwrapper.h":
 
 
 
-cdef extern from "./config.h" namespace "common":
+cdef extern from "./common/config/config.h" namespace "common":
     cdef cppclass ConfigValue:
         uint32_t tsblock_mem_inc_step_size_
         uint32_t tsblock_max_memory_
@@ -210,11 +210,20 @@ cdef extern from "./config.h" namespace "common":
         int32_t chunk_group_size_threshold_
         int32_t record_count_for_next_mem_check_
         bint encrypt_flag_
+        uint8_t boolean_encoding_type_;
+        uint8_t int32_encoding_type_;
+        uint8_t int64_encoding_type_;
+        uint8_t float_encoding_type_;
+        uint8_t double_encoding_type_;
+        uint8_t string_encoding_type_;
+        uint8_t default_compression_type_;
 
 cdef extern from "./common/global.h" namespace "common":
     ConfigValue g_config_value_
+    void set_datatype_encoding(uint8_t data_type, uint8_t encoding)
+    void set_global_compression(uint8_t compression)
 
 cdef extern from "./common/db_common.h" namespace "common":
     void set_global_time_data_type(uint8_t data_type);
     void set_global_time_encoding(uint8_t encoding);
-    void set_global_time_compression(uint8_t compression);
\ No newline at end of file
+    void set_global_time_compression(uint8_t compression);
diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx
index 55446ee7..d852a05c 100644
--- a/python/tsfile/tsfile_py_cpp.pyx
+++ b/python/tsfile/tsfile_py_cpp.pyx
@@ -360,7 +360,14 @@ cpdef object get_tsfile_config():
         "time_compress_type_": 
CompressorPy(int(g_config_value_.time_compress_type_)),
         "chunk_group_size_threshold_": 
g_config_value_.chunk_group_size_threshold_,
         
"record_count_for_next_mem_check_":g_config_value_.record_count_for_next_mem_check_,
-        "encrypt_flag_":g_config_value_.encrypt_flag_
+        "encrypt_flag_":g_config_value_.encrypt_flag_,
+        
"boolean_encoding_type_":TSEncodingPy(int(g_config_value_.boolean_encoding_type_)),
+        "int32_encoding_type_": 
TSEncodingPy(int(g_config_value_.int32_encoding_type_)),
+        "int64_encoding_type_": 
TSEncodingPy(int(g_config_value_.int64_encoding_type_)),
+        "float_encoding_type_": 
TSEncodingPy(int(g_config_value_.float_encoding_type_)),
+        "double_encoding_type_": 
TSEncodingPy(int(g_config_value_.double_encoding_type_)),
+        "string_encoding_type_": 
TSEncodingPy(int(g_config_value_.string_encoding_type_)),
+        "default_compression_type_": 
CompressorPy(int(g_config_value_.default_compression_type_)),
     }
 
 
@@ -405,6 +412,42 @@ cpdef void set_tsfile_config(dict new_config):
         _check_bool(new_config["encrypt_flag_"])
         g_config_value_.encrypt_flag_ = <bint>new_config["encrypt_flag_"]
 
+    if "boolean_encoding_type_" in new_config:
+        if not isinstance(new_config["boolean_encoding_type_"], TSEncodingPy):
+            raise TypeError(f"Unsupported TSEncodingType: 
{new_config['boolean_encoding_type_']}")
+        set_datatype_encoding(TSDataTypePy.BOOLEAN.value, 
new_config['boolean_encoding_type_'].value)
+
+    if "int32_encoding_type_" in new_config:
+        if not isinstance(new_config["int32_encoding_type_"], TSEncodingPy):
+            raise TypeError(f"Unsupported TSEncodingType: 
{new_config['int32_encoding_type_']}")
+        set_datatype_encoding(TSDataTypePy.INT32.value, 
new_config['int32_encoding_type_'].value)
+
+    if "int64_encoding_type_" in new_config:
+        if not isinstance(new_config["int64_encoding_type_"], TSEncodingPy):
+            raise TypeError(f"Unsupported TSEncodingType: 
{new_config['int64_encoding_type_']}")
+        set_datatype_encoding(TSDataTypePy.INT64.value, 
new_config['int64_encoding_type_'].value)
+
+    if "float_encoding_type_" in new_config:
+        if not isinstance(new_config["float_encoding_type_"], TSEncodingPy):
+            raise TypeError(f"Unsupported TSEncodingType: 
{new_config['float_encoding_type_']}")
+        set_datatype_encoding(TSDataTypePy.FLOAT.value, 
new_config['float_encoding_type_'].value)
+
+    if "double_encoding_type_" in new_config:
+        if not isinstance(new_config["double_encoding_type_"], TSEncodingPy):
+            raise TypeError(f"Unsupported TSEncodingType: 
{new_config['double_encoding_type_']}")
+        set_datatype_encoding(TSDataTypePy.DOUBLE.value, 
new_config['double_encoding_type_'].value)
+
+    if "string_encoding_type_" in new_config:
+        if not isinstance(new_config["string_encoding_type_"], TSEncodingPy):
+            raise TypeError(f"Unsupported TSEncodingType: 
{new_config['string_encoding_type_']}")
+        set_datatype_encoding(TSDataTypePy.STRING.value, 
new_config['string_encoding_type_'].value)
+
+    if "default_compression_type_" in new_config:
+        if not isinstance(new_config["default_compression_type_"], 
CompressorPy):
+            raise TypeError(f"Unsupported CompressionType: 
{new_config['default_compression_type_']}")
+        set_global_compression(new_config["default_compression_type_"].value)
+
+
 cdef _check_uint32(value):
     if not isinstance(value, int) or value < 0 or value > 0xFFFFFFFF:
         raise TypeError(f"Expected uint32, got {type(value)}")
diff --git a/python/tsfile/tsfile_writer.pyx b/python/tsfile/tsfile_writer.pyx
index 8db496cd..20199195 100644
--- a/python/tsfile/tsfile_writer.pyx
+++ b/python/tsfile/tsfile_writer.pyx
@@ -29,7 +29,7 @@ from tsfile.tablet import Tablet as TabletPy
 cdef class TsFileWriterPy:
     cdef TsFileWriter writer
 
-    def __init__(self, pathname:str, memory_threshold:int):
+    def __init__(self, pathname:str, memory_threshold:int = 128 * 1024 * 1024):
         self.writer = tsfile_writer_new_c(pathname, memory_threshold)
 
     def register_timeseries(self, device_name : str, timeseries_schema : 
TimeseriesSchemaPy):

(tsfile) branch colin_fix_config updated: add set config.

Reply via email to