This is an automated email from the ASF dual-hosted git repository.

haonan pushed a commit to branch rc/2.2.0
in repository https://gitbox.apache.org/repos/asf/tsfile.git

commit d7af333ea8a3deb0f0a77979d78d297ae01be5e8
Author: Hongzhi Gao <[email protected]>
AuthorDate: Mon Sep 1 17:16:42 2025 +0800

    [CPP/C] implement default encoding/compression configuration interface 
(#582)
    
    * [CPP/C] implement default encoding/compression configuration interface
    
    * [CPP/C] implement default encoding/compression configuration interface(c 
demo)
    
    * [CPP/C] implement default encoding/compression configuration interface(c 
demo)
    
    * [CPP/C] implement default encoding/compression configuration interface(c 
demo)
    
    * mvn spotless:apply -P with-cpp
    
    * [CPP/C] implement get configuration interface
---
 cpp/examples/c_examples/demo_write.c               |   8 ++
 cpp/src/common/global.h                            | 102 +++++++++++++------
 cpp/src/cwrapper/tsfile_cwrapper.cc                |  30 ++++++
 cpp/src/cwrapper/tsfile_cwrapper.h                 |  73 ++++++++++++++
 .../writer/table_view/tsfile_writer_table_test.cc  | 112 +++++++++++++++++++++
 5 files changed, 296 insertions(+), 29 deletions(-)

diff --git a/cpp/examples/c_examples/demo_write.c 
b/cpp/examples/c_examples/demo_write.c
index 444cbe66..326cfdcf 100644
--- a/cpp/examples/c_examples/demo_write.c
+++ b/cpp/examples/c_examples/demo_write.c
@@ -27,6 +27,14 @@
 // This example shows you how to write tsfile.
 ERRNO write_tsfile() {
     ERRNO code = 0;
+    code = set_global_compression(TS_COMPRESSION_LZ4);
+    if (code != RET_OK) {
+        return code;
+    }
+    code = set_datatype_encoding(TS_DATATYPE_INT32, TS_ENCODING_TS_2DIFF);
+    if (code != RET_OK) {
+        return code;
+    }
     char* table_name = "table1";
 
     // Create table schema to describe a table in a tsfile.
diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h
index 50ca8c8a..564f30c6 100644
--- a/cpp/src/common/global.h
+++ b/cpp/src/common/global.h
@@ -40,7 +40,8 @@ FORCE_INLINE int set_global_time_data_type(uint8_t data_type) 
{
 
 FORCE_INLINE int set_global_time_encoding(uint8_t encoding) {
     ASSERT(encoding >= PLAIN && encoding <= FREQ);
-    if (encoding != TS_2DIFF && encoding != PLAIN) {
+    if (encoding != TS_2DIFF && encoding != PLAIN && encoding != GORILLA &&
+        encoding != ZIGZAG && encoding != RLE && encoding != SPRINTZ) {
         return E_NOT_SUPPORT;
     }
     g_config_value_.time_encoding_type_ = static_cast<TSEncoding>(encoding);
@@ -49,7 +50,8 @@ FORCE_INLINE int set_global_time_encoding(uint8_t encoding) {
 
 FORCE_INLINE int set_global_time_compression(uint8_t compression) {
     ASSERT(compression >= UNCOMPRESSED && compression <= LZ4);
-    if (compression != UNCOMPRESSED && compression != LZ4) {
+    if (compression != UNCOMPRESSED && compression != SNAPPY &&
+        compression != GZIP && compression != LZO && compression != LZ4) {
         return E_NOT_SUPPORT;
     }
     g_config_value_.time_compress_type_ =
@@ -58,51 +60,52 @@ FORCE_INLINE int set_global_time_compression(uint8_t 
compression) {
 }
 
 FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) {
-    TSDataType dtype = static_cast<TSDataType>(data_type);
+    const TSDataType dtype = static_cast<TSDataType>(data_type);
+    const TSEncoding encoding_type = static_cast<TSEncoding>(encoding);
+
+    // Validate input parameters
     ASSERT(dtype >= BOOLEAN && dtype <= STRING);
-    TSEncoding encoding_type = static_cast<TSEncoding>(encoding);
-    ASSERT(encoding >= PLAIN && encoding <= FREQ);
+    ASSERT(encoding >= PLAIN && encoding <= SPRINTZ);
+
+    // Check encoding support for each data type
     switch (dtype) {
         case BOOLEAN:
-            if (encoding_type != PLAIN) {
-                return E_NOT_SUPPORT;
-            }
+            if (encoding_type != PLAIN) return E_NOT_SUPPORT;
             g_config_value_.boolean_encoding_type_ = encoding_type;
             break;
+
         case INT32:
-            if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
-                encoding_type != GORILLA) {
-                return E_NOT_SUPPORT;
-            }
-            g_config_value_.int32_encoding_type_ = encoding_type;
-            break;
+        case DATE:
         case INT64:
             if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
-                encoding_type != GORILLA) {
-                return E_NOT_SUPPORT;
-            }
-            g_config_value_.int64_encoding_type_ = encoding_type;
-            break;
-        case STRING:
-            if (encoding_type != PLAIN) {
+                encoding_type != GORILLA && encoding_type != ZIGZAG &&
+                encoding_type != RLE && encoding_type != SPRINTZ) {
                 return E_NOT_SUPPORT;
             }
-            g_config_value_.string_encoding_type_ = encoding_type;
+            dtype == INT32
+                ? g_config_value_.int32_encoding_type_ = encoding_type
+                : g_config_value_.int64_encoding_type_ = encoding_type;
             break;
+
         case FLOAT:
+        case DOUBLE:
             if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
-                encoding_type != GORILLA) {
+                encoding_type != GORILLA && encoding_type != SPRINTZ) {
                 return E_NOT_SUPPORT;
             }
-            g_config_value_.float_encoding_type_ = encoding_type;
+            dtype == FLOAT
+                ? g_config_value_.float_encoding_type_ = encoding_type
+                : g_config_value_.double_encoding_type_ = encoding_type;
             break;
-        case DOUBLE:
-            if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
-                encoding_type != GORILLA) {
+
+        case STRING:
+        case TEXT:
+            if (encoding_type != PLAIN && encoding_type != DICTIONARY) {
                 return E_NOT_SUPPORT;
             }
-            g_config_value_.double_encoding_type_ = encoding_type;
+            g_config_value_.string_encoding_type_ = encoding_type;
             break;
+
         default:
             break;
     }
@@ -111,7 +114,8 @@ FORCE_INLINE int set_datatype_encoding(uint8_t data_type, 
uint8_t encoding) {
 
 FORCE_INLINE int set_global_compression(uint8_t compression) {
     ASSERT(compression >= UNCOMPRESSED && compression <= LZ4);
-    if (compression != UNCOMPRESSED && compression != LZ4) {
+    if (compression != UNCOMPRESSED && compression != SNAPPY &&
+        compression != GZIP && compression != LZO && compression != LZ4) {
         return E_NOT_SUPPORT;
     }
     g_config_value_.default_compression_type_ =
@@ -119,6 +123,46 @@ FORCE_INLINE int set_global_compression(uint8_t 
compression) {
     return E_OK;
 }
 
+FORCE_INLINE uint8_t get_global_time_encoding() {
+    return static_cast<uint8_t>(g_config_value_.time_encoding_type_);
+}
+
+FORCE_INLINE uint8_t get_global_time_compression() {
+    return static_cast<uint8_t>(g_config_value_.time_compress_type_);
+}
+
+FORCE_INLINE uint8_t get_datatype_encoding(uint8_t data_type) {
+    const TSDataType dtype = static_cast<TSDataType>(data_type);
+
+    // Validate input parameter
+    ASSERT(dtype >= BOOLEAN && dtype <= STRING);
+
+    switch (dtype) {
+        case BOOLEAN:
+            return 
static_cast<uint8_t>(g_config_value_.boolean_encoding_type_);
+        case INT32:
+            return static_cast<uint8_t>(g_config_value_.int32_encoding_type_);
+        case INT64:
+            return static_cast<uint8_t>(g_config_value_.int64_encoding_type_);
+        case FLOAT:
+            return static_cast<uint8_t>(g_config_value_.float_encoding_type_);
+        case DOUBLE:
+            return static_cast<uint8_t>(g_config_value_.double_encoding_type_);
+        case STRING:
+        case TEXT:
+            return static_cast<uint8_t>(g_config_value_.string_encoding_type_);
+        case DATE:
+            return static_cast<uint8_t>(g_config_value_.int64_encoding_type_);
+        default:
+            return static_cast<uint8_t>(
+                PLAIN);  // Return default encoding for unknown types
+    }
+}
+
+FORCE_INLINE uint8_t get_global_compression() {
+    return static_cast<uint8_t>(g_config_value_.default_compression_type_);
+}
+
 extern int init_common();
 extern bool is_timestamp_column_name(const char *time_col_name);
 extern void cols_to_json(ByteStream *byte_stream,
diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc 
b/cpp/src/cwrapper/tsfile_cwrapper.cc
index 7b09f26d..e6e15dd4 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.cc
+++ b/cpp/src/cwrapper/tsfile_cwrapper.cc
@@ -42,6 +42,36 @@ void init_tsfile_config() {
     }
 }
 
+uint8_t get_global_time_encoding() {
+    return common::get_global_time_encoding();
+}
+
+uint8_t get_global_time_compression() {
+    return common::get_global_time_compression();
+}
+
+uint8_t get_datatype_encoding(uint8_t data_type) {
+    return common::get_datatype_encoding(data_type);
+}
+
+uint8_t get_global_compression() { return common::get_global_compression(); }
+
+int set_global_time_encoding(uint8_t encoding) {
+    return common::set_global_time_encoding(encoding);
+}
+
+int set_global_time_compression(uint8_t compression) {
+    return common::set_global_time_compression(compression);
+}
+
+int set_datatype_encoding(uint8_t data_type, uint8_t encoding) {
+    return common::set_datatype_encoding(data_type, encoding);
+}
+
+int set_global_compression(uint8_t compression) {
+    return common::set_global_compression(compression);
+}
+
 WriteFile write_file_new(const char *pathname, ERRNO *err_code) {
     int ret;
     init_tsfile_config();
diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h 
b/cpp/src/cwrapper/tsfile_cwrapper.h
index 1f651f5d..75dc0364 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.h
+++ b/cpp/src/cwrapper/tsfile_cwrapper.h
@@ -119,6 +119,79 @@ typedef void* ResultSet;
 typedef int32_t ERRNO;
 typedef int64_t Timestamp;
 
+/**
+ * @brief Get the encoding type for global time column
+ *
+ * @return uint8_t Time encoding type enum value (cast to uint8_t)
+ */
+uint8_t get_global_time_encoding();
+
+/**
+ * @brief Get the compression type for global time column
+ *
+ * @return uint8_t Time compression type enum value (cast to uint8_t)
+ */
+uint8_t get_global_time_compression();
+
+/**
+ * @brief Get the encoding type for specified data type
+ *
+ * @param data_type The data type to query encoding for
+ * @return uint8_t Encoding type enum value (cast to uint8_t)
+ */
+uint8_t get_datatype_encoding(uint8_t data_type);
+
+/**
+ * @brief Get the global default compression type
+ *
+ * @return uint8_t Compression type enum value (cast to uint8_t)
+ */
+uint8_t get_global_compression();
+
+/**
+ * @brief Sets the global time column encoding method
+ *
+ * Validates and sets the encoding type for time series timestamps.
+ * Supported encodings: TS_2DIFF, PLAIN, GORILLA, ZIGZAG, RLE, SPRINTZ
+ *
+ * @param encoding The encoding type to set (as uint8_t)
+ * @return int E_OK on success, E_NOT_SUPPORT for invalid encoding
+ */
+int set_global_time_encoding(uint8_t encoding);
+
+/**
+ * @brief Sets the global time column compression method
+ *
+ * Validates and sets the compression type for time series timestamps.
+ * Supported compressions: UNCOMPRESSED, SNAPPY, GZIP, LZO, LZ4
+ *
+ * @param compression The compression type to set (as uint8_t)
+ * @return int E_OK on success, E_NOT_SUPPORT for invalid compression
+ */
+int set_global_time_compression(uint8_t compression);
+
+/**
+ * @brief Set encoding type for specific data type
+ * @param data_type The data type to configure
+ * @param encoding The encoding type to set
+ * @return E_OK if success, E_NOT_SUPPORT if encoding is not supported for the
+ * data type
+ * @note Supported encodings per data type:
+ *        - BOOLEAN: PLAIN only
+ *        - INT32/INT64: PLAIN, TS_2DIFF, GORILLA, ZIGZAG, RLE, SPRINTZ
+ *        - FLOAT/DOUBLE: PLAIN, TS_2DIFF, GORILLA, SPRINTZ
+ *        - STRING: PLAIN, DICTIONARY
+ */
+int set_datatype_encoding(uint8_t data_type, uint8_t encoding);
+
+/**
+ * @brief Set the global default compression type
+ * @param compression Compression type to set
+ * @return E_OK if success, E_NOT_SUPPORT if compression is not supported
+ * @note Supported compressions: UNCOMPRESSED, SNAPPY, GZIP, LZO, LZ4
+ */
+int set_global_compression(uint8_t compression);
+
 /*--------------------------TsFile Reader and Writer------------------------ */
 
 /**
diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc 
b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
index 2bc9fd9a..8c373a3c 100644
--- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc
+++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
@@ -987,3 +987,115 @@ TEST_F(TsFileWriterTableTest, DiffCodecTypes) {
     ASSERT_EQ(reader.close(), common::E_OK);
     delete[] literal;
 }
+
+TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) {
+    // 1. Test setting global compression type
+    ASSERT_EQ(E_OK, set_global_compression(SNAPPY));
+
+    // 2. Test setting encoding types for different data types
+    ASSERT_EQ(E_OK, set_datatype_encoding(INT32, SPRINTZ));
+    ASSERT_EQ(E_OK, set_datatype_encoding(INT64, TS_2DIFF));
+    ASSERT_EQ(E_OK, set_datatype_encoding(FLOAT, GORILLA));
+    ASSERT_EQ(E_OK, set_datatype_encoding(DOUBLE, GORILLA));
+    ASSERT_EQ(E_OK, set_datatype_encoding(STRING, DICTIONARY));
+    ASSERT_EQ(E_OK, set_datatype_encoding(DATE, PLAIN));  // Added DATE support
+    ASSERT_EQ(E_OK,
+              set_datatype_encoding(TEXT, DICTIONARY));  // Added TEXT support
+
+    // 3. Create schema using these configurations
+    std::vector<MeasurementSchema*> measurement_schemas;
+    std::vector<ColumnCategory> column_categories;
+
+    std::vector<std::string> measurement_names = {
+        "int32_sprintz", "int64_ts2diff", "float_gorilla", "double_gorilla",
+        "string_dict",   "date_plain",    "text_dict"};
+
+    std::vector<common::TSDataType> data_types = {INT32,  INT64, FLOAT, DOUBLE,
+                                                  STRING, DATE,  TEXT};
+
+    std::vector<common::TSEncoding> encodings = {
+        SPRINTZ, TS_2DIFF, GORILLA, GORILLA, DICTIONARY, PLAIN, DICTIONARY};
+
+    // Create measurement schemas with configured encodings and compression
+    for (int i = 0; i < measurement_names.size(); i++) {
+        measurement_schemas.emplace_back(new MeasurementSchema(
+            measurement_names[i], data_types[i], encodings[i], SNAPPY));
+        column_categories.emplace_back(ColumnCategory::FIELD);
+    }
+
+    // 4. Write and verify data
+    auto table_schema = new TableSchema("configTestTable", measurement_schemas,
+                                        column_categories);
+    auto tsfile_table_writer =
+        std::make_shared<TsFileTableWriter>(&write_file_, table_schema);
+
+    // Create test data tablet
+    Tablet tablet(table_schema->get_measurement_names(),
+                  table_schema->get_data_types(), 10);
+    char* literal = new char[std::strlen("test_str") + 1];
+    std::strcpy(literal, "test_str");
+    String literal_str(literal, std::strlen("test_str"));
+
+    // Prepare DATE and TEXT values
+    std::time_t now = std::time(nullptr);
+    std::tm* local_time = std::localtime(&now);
+    std::tm today = {};
+    today.tm_year = local_time->tm_year;
+    today.tm_mon = local_time->tm_mon;
+    today.tm_mday = local_time->tm_mday;
+    char* text_literal = new char[std::strlen("sample_text") + 1];
+    std::strcpy(text_literal, "sample_text");
+    String text_str(text_literal, std::strlen("sample_text"));
+
+    // Fill tablet with test values
+    for (int i = 0; i < 10; i++) {
+        tablet.add_timestamp(i, static_cast<int64_t>(i));
+        tablet.add_value(i, 0, (int32_t)32);  // INT32 with SPRINTZ encoding
+        tablet.add_value(i, 1, (int64_t)64);  // INT64 with TS_2DIFF encoding
+        tablet.add_value(i, 2, (float)1.0);   // FLOAT with GORILLA encoding
+        tablet.add_value(i, 3, (double)2.0);  // DOUBLE with GORILLA encoding
+        tablet.add_value(i, 4, literal_str);  // STRING with DICTIONARY 
encoding
+        tablet.add_value(i, 5, today);  // DATE with PLAIN encoding (added)
+        tablet.add_value(i, 6,
+                         text_str);  // TEXT with DICTIONARY encoding (added)
+    }
+
+    // Write and flush data
+    ASSERT_EQ(tsfile_table_writer->write_table(tablet), E_OK);
+    ASSERT_EQ(tsfile_table_writer->flush(), E_OK);
+    ASSERT_EQ(tsfile_table_writer->close(), E_OK);
+
+    // 5. Verify read data matches what was written
+    auto reader = TsFileReader();
+    reader.open(write_file_.get_file_path());
+    ResultSet* ret = nullptr;
+    int ret_value =
+        reader.query("configTestTable", measurement_names, 0, 10, ret);
+    ASSERT_EQ(common::E_OK, ret_value);
+
+    auto table_result_set = (TableResultSet*)ret;
+    bool has_next = false;
+    while (IS_SUCC(table_result_set->next(has_next)) && has_next) {
+        // Verify all values were correctly encoded/decoded
+        ASSERT_EQ(table_result_set->get_value<int32_t>(2), 32);        // INT32
+        ASSERT_EQ(table_result_set->get_value<int64_t>(3), 64);        // INT64
+        ASSERT_FLOAT_EQ(table_result_set->get_value<float>(4), 1.0f);  // FLOAT
+        ASSERT_DOUBLE_EQ(table_result_set->get_value<double>(5),
+                         2.0);  // DOUBLE
+        ASSERT_EQ(table_result_set->get_value<common::String*>(6)->compare(
+                      literal_str),
+                  0);  // STRING
+        ASSERT_TRUE(DateConverter::is_tm_ymd_equal(
+            table_result_set->get_value<std::tm>(7), today));
+        ASSERT_EQ(
+            table_result_set->get_value<common::String*>(8)->compare(text_str),
+            0);  // TEXT (added)
+    }
+
+    // 6. Clean up resources
+    reader.destroy_query_data_set(table_result_set);
+    ASSERT_EQ(reader.close(), common::E_OK);
+    delete[] literal;
+    delete[] text_literal;
+    delete table_schema;
+}
\ No newline at end of file

Reply via email to