This is an automated email from the ASF dual-hosted git repository. colinlee pushed a commit to branch colin_python_V4 in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit a88cbca2fca90de8d662df0cdc24a52eb745d218 Author: Tian Jiang <[email protected]> AuthorDate: Fri Feb 28 12:16:13 2025 +0800 Fix losing first page and missing time column in result set metadata --- cpp/examples/c_examples/demo_read.c | 2 +- cpp/examples/c_examples/demo_write.c | 6 +++--- cpp/examples/cpp_examples/demo_read.cpp | 5 +++-- cpp/src/cwrapper/tsfile_cwrapper.cc | 12 ++++++------ cpp/src/reader/result_set.h | 18 ++++++++++++------ cpp/src/reader/table_result_set.cc | 5 +++-- cpp/src/writer/chunk_writer.cc | 11 +++++++++-- cpp/src/writer/chunk_writer.h | 8 +++++--- cpp/src/writer/time_chunk_writer.cc | 11 +++++++++-- cpp/src/writer/time_chunk_writer.h | 8 +++++--- cpp/src/writer/value_chunk_writer.cc | 11 +++++++++-- cpp/src/writer/value_chunk_writer.h | 8 +++++--- cpp/test/reader/table_view/tsfile_reader_table_test.cc | 12 +++++++----- cpp/test/reader/tsfile_reader_test.cc | 6 ++++-- 14 files changed, 81 insertions(+), 42 deletions(-) diff --git a/cpp/examples/c_examples/demo_read.c b/cpp/examples/c_examples/demo_read.c index 1636adbd..8e4e7acf 100644 --- a/cpp/examples/c_examples/demo_read.c +++ b/cpp/examples/c_examples/demo_read.c @@ -56,7 +56,7 @@ ERRNO read_tsfile() { Timestamp timestamp = tsfile_result_set_get_value_by_index_int64_t(ret, 1); printf("%ld ", timestamp); - for (int i = 1; i < column_num; i++) { + for (int i = 1; i <= column_num; i++) { if (tsfile_result_set_is_null_by_index(ret, i)) { printf(" null "); } else { diff --git a/cpp/examples/c_examples/demo_write.c b/cpp/examples/c_examples/demo_write.c index 78c134dc..fffe3f18 100644 --- a/cpp/examples/c_examples/demo_write.c +++ b/cpp/examples/c_examples/demo_write.c @@ -36,19 +36,19 @@ ERRNO write_tsfile() { table_schema.column_schemas = (ColumnSchema*)malloc(sizeof(ColumnSchema) * 3); table_schema.column_schemas[0] = - ColumnSchema{.column_name = strdup("id1"), + (ColumnSchema){.column_name = strdup("id1"), .data_type = TS_DATATYPE_STRING, .compression = TS_COMPRESSION_UNCOMPRESSED, .encoding = TS_ENCODING_PLAIN, .column_category = TAG}; table_schema.column_schemas[1] = - ColumnSchema{.column_name = strdup("id2"), + (ColumnSchema){.column_name = strdup("id2"), .data_type = TS_DATATYPE_STRING, .compression = TS_COMPRESSION_UNCOMPRESSED, .encoding = TS_ENCODING_PLAIN, .column_category = TAG}; table_schema.column_schemas[2] = - ColumnSchema{.column_name = strdup("s1"), + (ColumnSchema){.column_name = strdup("s1"), .data_type = TS_DATATYPE_INT32, .compression = TS_COMPRESSION_UNCOMPRESSED, .encoding = TS_ENCODING_PLAIN, diff --git a/cpp/examples/cpp_examples/demo_read.cpp b/cpp/examples/cpp_examples/demo_read.cpp index 4961fb04..4951e8ca 100644 --- a/cpp/examples/cpp_examples/demo_read.cpp +++ b/cpp/examples/cpp_examples/demo_read.cpp @@ -51,7 +51,7 @@ int demo_read() { // Metadata in query handler. auto metadata = ret->get_metadata(); int column_num = metadata->get_column_count(); - for (int i = 0; i < column_num; i++) { + for (int i = 1; i <= column_num; i++) { std::cout << "column name: " << metadata->get_column_name(i) << std::endl; std::cout << "column type: " << metadata->get_column_type(i) @@ -63,7 +63,7 @@ int demo_read() { while ((code = ret->next(has_next)) == common::E_OK && has_next) { // Timestamp at column 1 and column index begin from 1. Timestamp timestamp = ret->get_value<Timestamp>(1); - for (int i = 0; i < column_num; i++) { + for (int i = 1; i <= column_num; i++) { if (ret->is_null(i)) { std::cout << "null" << std::endl; } else { @@ -98,4 +98,5 @@ int demo_read() { // Close reader. reader.close(); + return 0; } diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 6fb10355..47a61642 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -337,27 +337,27 @@ ResultSetMetaData tsfile_result_set_get_metadata(ResultSet result_set) { malloc(meta_data.column_num * sizeof(TSDataType))); for (int i = 0; i < meta_data.column_num; i++) { meta_data.column_names[i] = - strdup(result_set_metadata->get_column_name(i).c_str()); + strdup(result_set_metadata->get_column_name(i + 1).c_str()); meta_data.data_types[i] = - static_cast<TSDataType>(result_set_metadata->get_column_type(i)); + static_cast<TSDataType>(result_set_metadata->get_column_type(i + 1)); } return meta_data; } char *tsfile_result_set_metadata_get_column_name(ResultSetMetaData result_set, uint32_t column_index) { - if (column_index >= result_set.column_num) { + if (column_index > result_set.column_num) { return nullptr; } - return result_set.column_names[column_index]; + return result_set.column_names[column_index - 1]; } TSDataType tsfile_result_set_metadata_get_data_type( ResultSetMetaData result_set, uint32_t column_index) { - if (column_index >= result_set.column_num) { + if (column_index > result_set.column_num) { return TS_DATATYPE_INVALID; } - return result_set.data_types[column_index]; + return result_set.data_types[column_index - 1]; } int tsfile_result_set_metadata_get_column_num(ResultSetMetaData result_set) { diff --git a/cpp/src/reader/result_set.h b/cpp/src/reader/result_set.h index 309af2ce..6f65acb7 100644 --- a/cpp/src/reader/result_set.h +++ b/cpp/src/reader/result_set.h @@ -29,15 +29,21 @@ namespace storage { class ResultSetMetadata { public: ResultSetMetadata(const std::vector<std::string>& column_names, - const std::vector<common::TSDataType>& column_types) - : column_names_(column_names), column_types_(column_types) {} + const std::vector<common::TSDataType>& column_types) { + this->column_names_.emplace_back("time"); + this->column_types_.emplace_back(common::INT64); + for (size_t i = 0; i < column_names.size(); ++i) { + this->column_names_.emplace_back(column_names[i]); + this->column_types_.emplace_back(column_types[i]); + } + } common::TSDataType get_column_type(uint32_t column_index) { - ASSERT(column_index >= 0 && column_index < column_types_.size()); - return column_types_[column_index]; + ASSERT(column_index >= 1 && column_index <= column_types_.size()); + return column_types_[column_index - 1]; } std::string get_column_name(uint32_t column_index) { - ASSERT(column_index >= 0 && column_index < column_names_.size()); - return column_names_[column_index]; + ASSERT(column_index >= 1 && column_index <= column_names_.size()); + return column_names_[column_index - 1]; } uint32_t get_column_count() { return column_names_.size(); } diff --git a/cpp/src/reader/table_result_set.cc b/cpp/src/reader/table_result_set.cc index 6cf6d7e3..f520a37d 100644 --- a/cpp/src/reader/table_result_set.cc +++ b/cpp/src/reader/table_result_set.cc @@ -72,12 +72,13 @@ bool TableResultSet::is_null(const std::string& column_name) { if (iter == index_lookup_.end()) { return true; } else { - return is_null(iter->second); + return is_null(iter->second + 1); } } bool TableResultSet::is_null(uint32_t column_index) { - return row_record_->get_field(column_index) == nullptr; + ASSERT(1 <= column_index && column_index <= row_record_->get_col_num()); + return row_record_->get_field(column_index - 1) == nullptr; } RowRecord* TableResultSet::get_row_record() { diff --git a/cpp/src/writer/chunk_writer.cc b/cpp/src/writer/chunk_writer.cc index ec2f98b7..888692fb 100644 --- a/cpp/src/writer/chunk_writer.cc +++ b/cpp/src/writer/chunk_writer.cc @@ -127,9 +127,9 @@ void ChunkWriter::save_first_page_data(PageWriter &first_page_writer) { first_page_statistic_->deep_copy_from(first_page_writer.get_statistic()); } -int ChunkWriter::write_first_page_data(ByteStream &pages_data) { +int ChunkWriter::write_first_page_data(ByteStream &pages_data, bool with_statistic) { int ret = E_OK; - if (RET_FAIL(first_page_statistic_->serialize_to(pages_data))) { + if (with_statistic && RET_FAIL(first_page_statistic_->serialize_to(pages_data))) { } else if (RET_FAIL( pages_data.write_buf(first_page_data_.compressed_buf_, first_page_data_.compressed_size_))) { @@ -145,6 +145,13 @@ int ChunkWriter::end_encode_chunk() { chunk_header_.data_size_ = chunk_data_.total_size(); chunk_header_.num_of_pages_ = num_of_pages_; } + } else if (first_page_statistic_ != nullptr) { + ret = write_first_page_data(chunk_data_, false); + if (E_OK == ret) { + free_first_writer_data(); + chunk_header_.data_size_ = chunk_data_.total_size(); + chunk_header_.num_of_pages_ = num_of_pages_; + } } #if DEBUG_SE std::cout << "end_encode_chunk: num_of_pages_=" << num_of_pages_ diff --git a/cpp/src/writer/chunk_writer.h b/cpp/src/writer/chunk_writer.h index b0f1136b..6d80353f 100644 --- a/cpp/src/writer/chunk_writer.h +++ b/cpp/src/writer/chunk_writer.h @@ -119,12 +119,14 @@ class ChunkWriter { FORCE_INLINE void free_first_writer_data() { // free memory first_page_data_.destroy(); - StatisticFactory::free(first_page_statistic_); - first_page_statistic_ = nullptr; + if (first_page_statistic_ != nullptr) { + StatisticFactory::free(first_page_statistic_); + first_page_statistic_ = nullptr; + } } int seal_cur_page(bool end_chunk); void save_first_page_data(PageWriter &first_page_writer); - int write_first_page_data(common::ByteStream &pages_data); + int write_first_page_data(common::ByteStream &pages_data, bool with_statistic = true); private: common::TSDataType data_type_; diff --git a/cpp/src/writer/time_chunk_writer.cc b/cpp/src/writer/time_chunk_writer.cc index 565fbd3b..b65b856b 100644 --- a/cpp/src/writer/time_chunk_writer.cc +++ b/cpp/src/writer/time_chunk_writer.cc @@ -130,9 +130,9 @@ void TimeChunkWriter::save_first_page_data(TimePageWriter &first_page_writer) { first_page_statistic_->deep_copy_from(first_page_writer.get_statistic()); } -int TimeChunkWriter::write_first_page_data(ByteStream &pages_data) { +int TimeChunkWriter::write_first_page_data(ByteStream &pages_data, bool with_statistic) { int ret = E_OK; - if (RET_FAIL(first_page_statistic_->serialize_to(pages_data))) { + if (with_statistic && RET_FAIL(first_page_statistic_->serialize_to(pages_data))) { } else if (RET_FAIL( pages_data.write_buf(first_page_data_.compressed_buf_, first_page_data_.compressed_size_))) { @@ -148,6 +148,13 @@ int TimeChunkWriter::end_encode_chunk() { chunk_header_.data_size_ = chunk_data_.total_size(); chunk_header_.num_of_pages_ = num_of_pages_; } + } else if (first_page_statistic_ != nullptr) { + ret = write_first_page_data(chunk_data_, false); + if (E_OK == ret) { + free_first_writer_data(); + chunk_header_.data_size_ = chunk_data_.total_size(); + chunk_header_.num_of_pages_ = num_of_pages_; + } } #if DEBUG_SE std::cout << "end_encode_time_chunk: num_of_pages_=" << num_of_pages_ diff --git a/cpp/src/writer/time_chunk_writer.h b/cpp/src/writer/time_chunk_writer.h index 672a0fcf..d97a8aa9 100644 --- a/cpp/src/writer/time_chunk_writer.h +++ b/cpp/src/writer/time_chunk_writer.h @@ -84,12 +84,14 @@ class TimeChunkWriter { FORCE_INLINE void free_first_writer_data() { // free memory first_page_data_.destroy(); - StatisticFactory::free(first_page_statistic_); - first_page_statistic_ = nullptr; + if (first_page_statistic_ != nullptr) { + StatisticFactory::free(first_page_statistic_); + first_page_statistic_ = nullptr; + } } int seal_cur_page(bool end_chunk); void save_first_page_data(TimePageWriter &first_time_page_writer); - int write_first_page_data(common::ByteStream &pages_data); + int write_first_page_data(common::ByteStream &pages_data, bool with_statistic = true); private: TimePageWriter time_page_writer_; diff --git a/cpp/src/writer/value_chunk_writer.cc b/cpp/src/writer/value_chunk_writer.cc index 5cbdf7e0..6c23cdad 100644 --- a/cpp/src/writer/value_chunk_writer.cc +++ b/cpp/src/writer/value_chunk_writer.cc @@ -132,9 +132,9 @@ void ValueChunkWriter::save_first_page_data( first_page_statistic_->deep_copy_from(first_page_writer.get_statistic()); } -int ValueChunkWriter::write_first_page_data(ByteStream &pages_data) { +int ValueChunkWriter::write_first_page_data(ByteStream &pages_data, bool with_statistic) { int ret = E_OK; - if (RET_FAIL(first_page_statistic_->serialize_to(pages_data))) { + if (with_statistic && RET_FAIL(first_page_statistic_->serialize_to(pages_data))) { } else if (RET_FAIL( pages_data.write_buf(first_page_data_.compressed_buf_, first_page_data_.compressed_size_))) { @@ -150,6 +150,13 @@ int ValueChunkWriter::end_encode_chunk() { chunk_header_.data_size_ = chunk_data_.total_size(); chunk_header_.num_of_pages_ = num_of_pages_; } + } else if (first_page_statistic_ != nullptr) { + ret = write_first_page_data(chunk_data_, false); + if (E_OK == ret) { + free_first_writer_data(); + chunk_header_.data_size_ = chunk_data_.total_size(); + chunk_header_.num_of_pages_ = num_of_pages_; + } } #if DEBUG_SE std::cout << "end_encode_chunk: num_of_pages_=" << num_of_pages_ diff --git a/cpp/src/writer/value_chunk_writer.h b/cpp/src/writer/value_chunk_writer.h index 47c1c650..52581a34 100644 --- a/cpp/src/writer/value_chunk_writer.h +++ b/cpp/src/writer/value_chunk_writer.h @@ -105,12 +105,14 @@ class ValueChunkWriter { } FORCE_INLINE void free_first_writer_data() { first_page_data_.destroy(); - StatisticFactory::free(first_page_statistic_); - first_page_statistic_ = nullptr; + if (first_page_statistic_ != nullptr) { + StatisticFactory::free(first_page_statistic_); + first_page_statistic_ = nullptr; + } } int seal_cur_page(bool end_chunk); void save_first_page_data(ValuePageWriter &first_page_writer); - int write_first_page_data(common::ByteStream &pages_data); + int write_first_page_data(common::ByteStream &pages_data, bool with_statistic = true); private: common::TSDataType data_type_; diff --git a/cpp/test/reader/table_view/tsfile_reader_table_test.cc b/cpp/test/reader/table_view/tsfile_reader_table_test.cc index f28fdc16..1f37fb99 100644 --- a/cpp/test/reader/table_view/tsfile_reader_table_test.cc +++ b/cpp/test/reader/table_view/tsfile_reader_table_test.cc @@ -212,14 +212,16 @@ TEST_F(TsFileTableReaderTest, TableModelResultMetadata) { tmp_result_set); auto* table_result_set = (TableResultSet*)tmp_result_set; auto result_set_metadata = table_result_set->get_metadata(); - ASSERT_EQ(result_set_metadata->get_column_count(), 10); - for (int i = 0; i < 5; i++) { - ASSERT_EQ(result_set_metadata->get_column_name(i), "id" + to_string(i)); + ASSERT_EQ(result_set_metadata->get_column_count(), 11); + ASSERT_EQ(result_set_metadata->get_column_name(1), "time"); + ASSERT_EQ(result_set_metadata->get_column_type(1), INT64); + for (int i = 2; i <= 6; i++) { + ASSERT_EQ(result_set_metadata->get_column_name(i), "id" + to_string(i-2)); ASSERT_EQ(result_set_metadata->get_column_type(i), TSDataType::STRING); } - for (int i = 5; i < 10; i++) { + for (int i = 7; i <= 11; i++) { ASSERT_EQ(result_set_metadata->get_column_name(i), - "s" + to_string(i - 5)); + "s" + to_string(i - 7)); ASSERT_EQ(result_set_metadata->get_column_type(i), TSDataType::INT64); } reader.destroy_query_data_set(table_result_set); diff --git a/cpp/test/reader/tsfile_reader_test.cc b/cpp/test/reader/tsfile_reader_test.cc index 9686f4df..c90ebb4e 100644 --- a/cpp/test/reader/tsfile_reader_test.cc +++ b/cpp/test/reader/tsfile_reader_test.cc @@ -143,8 +143,10 @@ TEST_F(TsFileReaderTest, ResultSetMetadata) { auto* qds = (QDSWithoutTimeGenerator*)tmp_qds; std::shared_ptr<ResultSetMetadata> result_set_metadata = qds->get_metadata(); - ASSERT_EQ(result_set_metadata->get_column_type(0), data_type); - ASSERT_EQ(result_set_metadata->get_column_name(0), + ASSERT_EQ(result_set_metadata->get_column_type(1), INT64); + ASSERT_EQ(result_set_metadata->get_column_name(1), "time"); + ASSERT_EQ(result_set_metadata->get_column_type(2), data_type); + ASSERT_EQ(result_set_metadata->get_column_name(2), device_path + "." + measurement_name); reader.destroy_query_data_set(qds); reader.close();
