This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new a5fee938 [CPP/C] implement default encoding/compression configuration
interface (#582)
a5fee938 is described below
commit a5fee938a9142cf9d91801a53aef6858dbfe5f92
Author: Hongzhi Gao <[email protected]>
AuthorDate: Mon Sep 1 17:16:42 2025 +0800
[CPP/C] implement default encoding/compression configuration interface
(#582)
* [CPP/C] implement default encoding/compression configuration interface
* [CPP/C] implement default encoding/compression configuration interface(c
demo)
* [CPP/C] implement default encoding/compression configuration interface(c
demo)
* [CPP/C] implement default encoding/compression configuration interface(c
demo)
* mvn spotless:apply -P with-cpp
* [CPP/C] implement get configuration interface
---
cpp/examples/c_examples/demo_write.c | 8 ++
cpp/src/common/global.h | 102 +++++++++++++------
cpp/src/cwrapper/tsfile_cwrapper.cc | 30 ++++++
cpp/src/cwrapper/tsfile_cwrapper.h | 73 ++++++++++++++
.../writer/table_view/tsfile_writer_table_test.cc | 112 +++++++++++++++++++++
5 files changed, 296 insertions(+), 29 deletions(-)
diff --git a/cpp/examples/c_examples/demo_write.c
b/cpp/examples/c_examples/demo_write.c
index 444cbe66..326cfdcf 100644
--- a/cpp/examples/c_examples/demo_write.c
+++ b/cpp/examples/c_examples/demo_write.c
@@ -27,6 +27,14 @@
// This example shows you how to write tsfile.
ERRNO write_tsfile() {
ERRNO code = 0;
+ code = set_global_compression(TS_COMPRESSION_LZ4);
+ if (code != RET_OK) {
+ return code;
+ }
+ code = set_datatype_encoding(TS_DATATYPE_INT32, TS_ENCODING_TS_2DIFF);
+ if (code != RET_OK) {
+ return code;
+ }
char* table_name = "table1";
// Create table schema to describe a table in a tsfile.
diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h
index 50ca8c8a..564f30c6 100644
--- a/cpp/src/common/global.h
+++ b/cpp/src/common/global.h
@@ -40,7 +40,8 @@ FORCE_INLINE int set_global_time_data_type(uint8_t data_type)
{
FORCE_INLINE int set_global_time_encoding(uint8_t encoding) {
ASSERT(encoding >= PLAIN && encoding <= FREQ);
- if (encoding != TS_2DIFF && encoding != PLAIN) {
+ if (encoding != TS_2DIFF && encoding != PLAIN && encoding != GORILLA &&
+ encoding != ZIGZAG && encoding != RLE && encoding != SPRINTZ) {
return E_NOT_SUPPORT;
}
g_config_value_.time_encoding_type_ = static_cast<TSEncoding>(encoding);
@@ -49,7 +50,8 @@ FORCE_INLINE int set_global_time_encoding(uint8_t encoding) {
FORCE_INLINE int set_global_time_compression(uint8_t compression) {
ASSERT(compression >= UNCOMPRESSED && compression <= LZ4);
- if (compression != UNCOMPRESSED && compression != LZ4) {
+ if (compression != UNCOMPRESSED && compression != SNAPPY &&
+ compression != GZIP && compression != LZO && compression != LZ4) {
return E_NOT_SUPPORT;
}
g_config_value_.time_compress_type_ =
@@ -58,51 +60,52 @@ FORCE_INLINE int set_global_time_compression(uint8_t
compression) {
}
FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) {
- TSDataType dtype = static_cast<TSDataType>(data_type);
+ const TSDataType dtype = static_cast<TSDataType>(data_type);
+ const TSEncoding encoding_type = static_cast<TSEncoding>(encoding);
+
+ // Validate input parameters
ASSERT(dtype >= BOOLEAN && dtype <= STRING);
- TSEncoding encoding_type = static_cast<TSEncoding>(encoding);
- ASSERT(encoding >= PLAIN && encoding <= FREQ);
+ ASSERT(encoding >= PLAIN && encoding <= SPRINTZ);
+
+ // Check encoding support for each data type
switch (dtype) {
case BOOLEAN:
- if (encoding_type != PLAIN) {
- return E_NOT_SUPPORT;
- }
+ if (encoding_type != PLAIN) return E_NOT_SUPPORT;
g_config_value_.boolean_encoding_type_ = encoding_type;
break;
+
case INT32:
- if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
- encoding_type != GORILLA) {
- return E_NOT_SUPPORT;
- }
- g_config_value_.int32_encoding_type_ = encoding_type;
- break;
+ case DATE:
case INT64:
if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
- encoding_type != GORILLA) {
- return E_NOT_SUPPORT;
- }
- g_config_value_.int64_encoding_type_ = encoding_type;
- break;
- case STRING:
- if (encoding_type != PLAIN) {
+ encoding_type != GORILLA && encoding_type != ZIGZAG &&
+ encoding_type != RLE && encoding_type != SPRINTZ) {
return E_NOT_SUPPORT;
}
- g_config_value_.string_encoding_type_ = encoding_type;
+ dtype == INT32
+ ? g_config_value_.int32_encoding_type_ = encoding_type
+ : g_config_value_.int64_encoding_type_ = encoding_type;
break;
+
case FLOAT:
+ case DOUBLE:
if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
- encoding_type != GORILLA) {
+ encoding_type != GORILLA && encoding_type != SPRINTZ) {
return E_NOT_SUPPORT;
}
- g_config_value_.float_encoding_type_ = encoding_type;
+ dtype == FLOAT
+ ? g_config_value_.float_encoding_type_ = encoding_type
+ : g_config_value_.double_encoding_type_ = encoding_type;
break;
- case DOUBLE:
- if (encoding_type != PLAIN && encoding_type != TS_2DIFF &&
- encoding_type != GORILLA) {
+
+ case STRING:
+ case TEXT:
+ if (encoding_type != PLAIN && encoding_type != DICTIONARY) {
return E_NOT_SUPPORT;
}
- g_config_value_.double_encoding_type_ = encoding_type;
+ g_config_value_.string_encoding_type_ = encoding_type;
break;
+
default:
break;
}
@@ -111,7 +114,8 @@ FORCE_INLINE int set_datatype_encoding(uint8_t data_type,
uint8_t encoding) {
FORCE_INLINE int set_global_compression(uint8_t compression) {
ASSERT(compression >= UNCOMPRESSED && compression <= LZ4);
- if (compression != UNCOMPRESSED && compression != LZ4) {
+ if (compression != UNCOMPRESSED && compression != SNAPPY &&
+ compression != GZIP && compression != LZO && compression != LZ4) {
return E_NOT_SUPPORT;
}
g_config_value_.default_compression_type_ =
@@ -119,6 +123,46 @@ FORCE_INLINE int set_global_compression(uint8_t
compression) {
return E_OK;
}
+FORCE_INLINE uint8_t get_global_time_encoding() {
+ return static_cast<uint8_t>(g_config_value_.time_encoding_type_);
+}
+
+FORCE_INLINE uint8_t get_global_time_compression() {
+ return static_cast<uint8_t>(g_config_value_.time_compress_type_);
+}
+
+FORCE_INLINE uint8_t get_datatype_encoding(uint8_t data_type) {
+ const TSDataType dtype = static_cast<TSDataType>(data_type);
+
+ // Validate input parameter
+ ASSERT(dtype >= BOOLEAN && dtype <= STRING);
+
+ switch (dtype) {
+ case BOOLEAN:
+ return
static_cast<uint8_t>(g_config_value_.boolean_encoding_type_);
+ case INT32:
+ return static_cast<uint8_t>(g_config_value_.int32_encoding_type_);
+ case INT64:
+ return static_cast<uint8_t>(g_config_value_.int64_encoding_type_);
+ case FLOAT:
+ return static_cast<uint8_t>(g_config_value_.float_encoding_type_);
+ case DOUBLE:
+ return static_cast<uint8_t>(g_config_value_.double_encoding_type_);
+ case STRING:
+ case TEXT:
+ return static_cast<uint8_t>(g_config_value_.string_encoding_type_);
+ case DATE:
+ return static_cast<uint8_t>(g_config_value_.int64_encoding_type_);
+ default:
+ return static_cast<uint8_t>(
+ PLAIN); // Return default encoding for unknown types
+ }
+}
+
+FORCE_INLINE uint8_t get_global_compression() {
+ return static_cast<uint8_t>(g_config_value_.default_compression_type_);
+}
+
extern int init_common();
extern bool is_timestamp_column_name(const char *time_col_name);
extern void cols_to_json(ByteStream *byte_stream,
diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc
b/cpp/src/cwrapper/tsfile_cwrapper.cc
index 7b09f26d..e6e15dd4 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.cc
+++ b/cpp/src/cwrapper/tsfile_cwrapper.cc
@@ -42,6 +42,36 @@ void init_tsfile_config() {
}
}
+uint8_t get_global_time_encoding() {
+ return common::get_global_time_encoding();
+}
+
+uint8_t get_global_time_compression() {
+ return common::get_global_time_compression();
+}
+
+uint8_t get_datatype_encoding(uint8_t data_type) {
+ return common::get_datatype_encoding(data_type);
+}
+
+uint8_t get_global_compression() { return common::get_global_compression(); }
+
+int set_global_time_encoding(uint8_t encoding) {
+ return common::set_global_time_encoding(encoding);
+}
+
+int set_global_time_compression(uint8_t compression) {
+ return common::set_global_time_compression(compression);
+}
+
+int set_datatype_encoding(uint8_t data_type, uint8_t encoding) {
+ return common::set_datatype_encoding(data_type, encoding);
+}
+
+int set_global_compression(uint8_t compression) {
+ return common::set_global_compression(compression);
+}
+
WriteFile write_file_new(const char *pathname, ERRNO *err_code) {
int ret;
init_tsfile_config();
diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h
b/cpp/src/cwrapper/tsfile_cwrapper.h
index 1f651f5d..75dc0364 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.h
+++ b/cpp/src/cwrapper/tsfile_cwrapper.h
@@ -119,6 +119,79 @@ typedef void* ResultSet;
typedef int32_t ERRNO;
typedef int64_t Timestamp;
+/**
+ * @brief Get the encoding type for global time column
+ *
+ * @return uint8_t Time encoding type enum value (cast to uint8_t)
+ */
+uint8_t get_global_time_encoding();
+
+/**
+ * @brief Get the compression type for global time column
+ *
+ * @return uint8_t Time compression type enum value (cast to uint8_t)
+ */
+uint8_t get_global_time_compression();
+
+/**
+ * @brief Get the encoding type for specified data type
+ *
+ * @param data_type The data type to query encoding for
+ * @return uint8_t Encoding type enum value (cast to uint8_t)
+ */
+uint8_t get_datatype_encoding(uint8_t data_type);
+
+/**
+ * @brief Get the global default compression type
+ *
+ * @return uint8_t Compression type enum value (cast to uint8_t)
+ */
+uint8_t get_global_compression();
+
+/**
+ * @brief Sets the global time column encoding method
+ *
+ * Validates and sets the encoding type for time series timestamps.
+ * Supported encodings: TS_2DIFF, PLAIN, GORILLA, ZIGZAG, RLE, SPRINTZ
+ *
+ * @param encoding The encoding type to set (as uint8_t)
+ * @return int E_OK on success, E_NOT_SUPPORT for invalid encoding
+ */
+int set_global_time_encoding(uint8_t encoding);
+
+/**
+ * @brief Sets the global time column compression method
+ *
+ * Validates and sets the compression type for time series timestamps.
+ * Supported compressions: UNCOMPRESSED, SNAPPY, GZIP, LZO, LZ4
+ *
+ * @param compression The compression type to set (as uint8_t)
+ * @return int E_OK on success, E_NOT_SUPPORT for invalid compression
+ */
+int set_global_time_compression(uint8_t compression);
+
+/**
+ * @brief Set encoding type for specific data type
+ * @param data_type The data type to configure
+ * @param encoding The encoding type to set
+ * @return E_OK if success, E_NOT_SUPPORT if encoding is not supported for the
+ * data type
+ * @note Supported encodings per data type:
+ * - BOOLEAN: PLAIN only
+ * - INT32/INT64: PLAIN, TS_2DIFF, GORILLA, ZIGZAG, RLE, SPRINTZ
+ * - FLOAT/DOUBLE: PLAIN, TS_2DIFF, GORILLA, SPRINTZ
+ * - STRING: PLAIN, DICTIONARY
+ */
+int set_datatype_encoding(uint8_t data_type, uint8_t encoding);
+
+/**
+ * @brief Set the global default compression type
+ * @param compression Compression type to set
+ * @return E_OK if success, E_NOT_SUPPORT if compression is not supported
+ * @note Supported compressions: UNCOMPRESSED, SNAPPY, GZIP, LZO, LZ4
+ */
+int set_global_compression(uint8_t compression);
+
/*--------------------------TsFile Reader and Writer------------------------ */
/**
diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc
b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
index 2bc9fd9a..8c373a3c 100644
--- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc
+++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
@@ -987,3 +987,115 @@ TEST_F(TsFileWriterTableTest, DiffCodecTypes) {
ASSERT_EQ(reader.close(), common::E_OK);
delete[] literal;
}
+
+TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) {
+ // 1. Test setting global compression type
+ ASSERT_EQ(E_OK, set_global_compression(SNAPPY));
+
+ // 2. Test setting encoding types for different data types
+ ASSERT_EQ(E_OK, set_datatype_encoding(INT32, SPRINTZ));
+ ASSERT_EQ(E_OK, set_datatype_encoding(INT64, TS_2DIFF));
+ ASSERT_EQ(E_OK, set_datatype_encoding(FLOAT, GORILLA));
+ ASSERT_EQ(E_OK, set_datatype_encoding(DOUBLE, GORILLA));
+ ASSERT_EQ(E_OK, set_datatype_encoding(STRING, DICTIONARY));
+ ASSERT_EQ(E_OK, set_datatype_encoding(DATE, PLAIN)); // Added DATE support
+ ASSERT_EQ(E_OK,
+ set_datatype_encoding(TEXT, DICTIONARY)); // Added TEXT support
+
+ // 3. Create schema using these configurations
+ std::vector<MeasurementSchema*> measurement_schemas;
+ std::vector<ColumnCategory> column_categories;
+
+ std::vector<std::string> measurement_names = {
+ "int32_sprintz", "int64_ts2diff", "float_gorilla", "double_gorilla",
+ "string_dict", "date_plain", "text_dict"};
+
+ std::vector<common::TSDataType> data_types = {INT32, INT64, FLOAT, DOUBLE,
+ STRING, DATE, TEXT};
+
+ std::vector<common::TSEncoding> encodings = {
+ SPRINTZ, TS_2DIFF, GORILLA, GORILLA, DICTIONARY, PLAIN, DICTIONARY};
+
+ // Create measurement schemas with configured encodings and compression
+ for (int i = 0; i < measurement_names.size(); i++) {
+ measurement_schemas.emplace_back(new MeasurementSchema(
+ measurement_names[i], data_types[i], encodings[i], SNAPPY));
+ column_categories.emplace_back(ColumnCategory::FIELD);
+ }
+
+ // 4. Write and verify data
+ auto table_schema = new TableSchema("configTestTable", measurement_schemas,
+ column_categories);
+ auto tsfile_table_writer =
+ std::make_shared<TsFileTableWriter>(&write_file_, table_schema);
+
+ // Create test data tablet
+ Tablet tablet(table_schema->get_measurement_names(),
+ table_schema->get_data_types(), 10);
+ char* literal = new char[std::strlen("test_str") + 1];
+ std::strcpy(literal, "test_str");
+ String literal_str(literal, std::strlen("test_str"));
+
+ // Prepare DATE and TEXT values
+ std::time_t now = std::time(nullptr);
+ std::tm* local_time = std::localtime(&now);
+ std::tm today = {};
+ today.tm_year = local_time->tm_year;
+ today.tm_mon = local_time->tm_mon;
+ today.tm_mday = local_time->tm_mday;
+ char* text_literal = new char[std::strlen("sample_text") + 1];
+ std::strcpy(text_literal, "sample_text");
+ String text_str(text_literal, std::strlen("sample_text"));
+
+ // Fill tablet with test values
+ for (int i = 0; i < 10; i++) {
+ tablet.add_timestamp(i, static_cast<int64_t>(i));
+ tablet.add_value(i, 0, (int32_t)32); // INT32 with SPRINTZ encoding
+ tablet.add_value(i, 1, (int64_t)64); // INT64 with TS_2DIFF encoding
+ tablet.add_value(i, 2, (float)1.0); // FLOAT with GORILLA encoding
+ tablet.add_value(i, 3, (double)2.0); // DOUBLE with GORILLA encoding
+ tablet.add_value(i, 4, literal_str); // STRING with DICTIONARY
encoding
+ tablet.add_value(i, 5, today); // DATE with PLAIN encoding (added)
+ tablet.add_value(i, 6,
+ text_str); // TEXT with DICTIONARY encoding (added)
+ }
+
+ // Write and flush data
+ ASSERT_EQ(tsfile_table_writer->write_table(tablet), E_OK);
+ ASSERT_EQ(tsfile_table_writer->flush(), E_OK);
+ ASSERT_EQ(tsfile_table_writer->close(), E_OK);
+
+ // 5. Verify read data matches what was written
+ auto reader = TsFileReader();
+ reader.open(write_file_.get_file_path());
+ ResultSet* ret = nullptr;
+ int ret_value =
+ reader.query("configTestTable", measurement_names, 0, 10, ret);
+ ASSERT_EQ(common::E_OK, ret_value);
+
+ auto table_result_set = (TableResultSet*)ret;
+ bool has_next = false;
+ while (IS_SUCC(table_result_set->next(has_next)) && has_next) {
+ // Verify all values were correctly encoded/decoded
+ ASSERT_EQ(table_result_set->get_value<int32_t>(2), 32); // INT32
+ ASSERT_EQ(table_result_set->get_value<int64_t>(3), 64); // INT64
+ ASSERT_FLOAT_EQ(table_result_set->get_value<float>(4), 1.0f); // FLOAT
+ ASSERT_DOUBLE_EQ(table_result_set->get_value<double>(5),
+ 2.0); // DOUBLE
+ ASSERT_EQ(table_result_set->get_value<common::String*>(6)->compare(
+ literal_str),
+ 0); // STRING
+ ASSERT_TRUE(DateConverter::is_tm_ymd_equal(
+ table_result_set->get_value<std::tm>(7), today));
+ ASSERT_EQ(
+ table_result_set->get_value<common::String*>(8)->compare(text_str),
+ 0); // TEXT (added)
+ }
+
+ // 6. Clean up resources
+ reader.destroy_query_data_set(table_result_set);
+ ASSERT_EQ(reader.close(), common::E_OK);
+ delete[] literal;
+ delete[] text_literal;
+ delete table_schema;
+}
\ No newline at end of file