This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new a55f058d fix get_metadata datatype (#781)
a55f058d is described below
commit a55f058d916ea729c65e43556ffc3334030f03c6
Author: Hongzhi Gao <[email protected]>
AuthorDate: Wed Apr 15 10:31:04 2026 +0800
fix get_metadata datatype (#781)
* fix get_metadata datatype
* add it
---
cpp/src/common/tsfile_common.h | 5 +-
.../reader/block/single_device_tsblock_reader.cc | 6 +-
cpp/src/reader/tsfile_series_scan_iterator.cc | 2 +-
cpp/test/reader/tsfile_reader_test.cc | 132 +++++++++++++++++++++
4 files changed, 140 insertions(+), 5 deletions(-)
diff --git a/cpp/src/common/tsfile_common.h b/cpp/src/common/tsfile_common.h
index c866e499..36d2b251 100644
--- a/cpp/src/common/tsfile_common.h
+++ b/cpp/src/common/tsfile_common.h
@@ -321,6 +321,7 @@ class ITimeseriesIndex {
virtual common::TSDataType get_data_type() const {
return common::INVALID_DATATYPE;
}
+ virtual bool is_aligned() const { return false; }
virtual Statistic* get_statistic() const { return nullptr; }
};
@@ -589,8 +590,10 @@ class AlignedTimeseriesIndex : public ITimeseriesIndex {
return value_ts_idx_->get_measurement_name();
}
virtual common::TSDataType get_data_type() const {
- return time_ts_idx_->get_data_type();
+ return value_ts_idx_ == nullptr ? common::INVALID_DATATYPE
+ : value_ts_idx_->get_data_type();
}
+ virtual bool is_aligned() const { return true; }
virtual Statistic* get_statistic() const {
return value_ts_idx_->get_statistic();
}
diff --git a/cpp/src/reader/block/single_device_tsblock_reader.cc
b/cpp/src/reader/block/single_device_tsblock_reader.cc
index 9b21e41b..93f42efd 100644
--- a/cpp/src/reader/block/single_device_tsblock_reader.cc
+++ b/cpp/src/reader/block/single_device_tsblock_reader.cc
@@ -63,7 +63,7 @@ int32_t SingleDeviceTsBlockReader::compute_dense_row_count(
int64_t time_count = 0;
int64_t value_count = 0;
- if (ts_index->get_data_type() == common::VECTOR) {
+ if (ts_index->is_aligned()) {
auto* time_list = ts_index->get_time_chunk_meta_list();
auto* value_list = ts_index->get_value_chunk_meta_list();
if (time_list == nullptr || value_list == nullptr) {
@@ -427,9 +427,9 @@ int SingleDeviceTsBlockReader::construct_column_context(
int ssi_offset, int ssi_limit) {
int ret = common::E_OK;
if (time_series_index == nullptr ||
- (time_series_index->get_data_type() != common::TSDataType::VECTOR &&
+ (!time_series_index->is_aligned() &&
time_series_index->get_chunk_meta_list()->empty())) {
- } else if (time_series_index->get_data_type() == common::VECTOR) {
+ } else if (time_series_index->is_aligned()) {
const AlignedTimeseriesIndex* aligned_time_series_index =
dynamic_cast<const AlignedTimeseriesIndex*>(time_series_index);
if (aligned_time_series_index == nullptr) {
diff --git a/cpp/src/reader/tsfile_series_scan_iterator.cc
b/cpp/src/reader/tsfile_series_scan_iterator.cc
index 5add1e9a..f53e88e7 100644
--- a/cpp/src/reader/tsfile_series_scan_iterator.cc
+++ b/cpp/src/reader/tsfile_series_scan_iterator.cc
@@ -171,7 +171,7 @@ void TsFileSeriesScanIterator::revert_tsblock() {
int TsFileSeriesScanIterator::init_chunk_reader() {
int ret = E_OK;
- is_aligned_ = itimeseries_index_->get_data_type() == common::VECTOR;
+ is_aligned_ = itimeseries_index_->is_aligned();
if (!is_aligned_) {
void* buf =
common::mem_alloc(sizeof(ChunkReader), common::MOD_CHUNK_READER);
diff --git a/cpp/test/reader/tsfile_reader_test.cc
b/cpp/test/reader/tsfile_reader_test.cc
index 54127e07..45261cf4 100644
--- a/cpp/test/reader/tsfile_reader_test.cc
+++ b/cpp/test/reader/tsfile_reader_test.cc
@@ -21,7 +21,9 @@
#include <gtest/gtest.h>
#include <sys/stat.h>
+#include <map>
#include <random>
+#include <unordered_map>
#include <vector>
#include "common/record.h"
@@ -264,6 +266,136 @@ TEST_F(TsFileReaderTest, GetTimeseriesSchema) {
reader.close();
}
+TEST_F(TsFileReaderTest, GetTimeseriesMetadataTableModelTypeAndDeviceFilter) {
+ std::vector<MeasurementSchema*> measurement_schemas = {
+ new MeasurementSchema("deviceid1", TSDataType::STRING),
+ new MeasurementSchema("deviceid2", TSDataType::STRING),
+ new MeasurementSchema("temperature", TSDataType::FLOAT),
+ new MeasurementSchema("pressure", TSDataType::DOUBLE),
+ new MeasurementSchema("humidity", TSDataType::INT32)};
+ std::vector<ColumnCategory> column_categories = {
+ ColumnCategory::TAG, ColumnCategory::TAG, ColumnCategory::FIELD,
+ ColumnCategory::FIELD, ColumnCategory::FIELD};
+ auto table_schema = std::make_shared<TableSchema>(
+ "testtable", measurement_schemas, column_categories);
+
+ ASSERT_EQ(tsfile_writer_->register_table(table_schema), E_OK);
+
+ Tablet tablet(table_schema->get_table_name(),
+ table_schema->get_measurement_names(),
+ table_schema->get_data_types(),
+ table_schema->get_column_categories(), 10);
+ for (int row = 0; row < 5; row++) {
+ ASSERT_EQ(tablet.add_timestamp(row, row), E_OK);
+ ASSERT_EQ(tablet.add_value(row, "deviceid1", "device_a"), E_OK);
+ ASSERT_EQ(tablet.add_value(row, "deviceid2", "device_b"), E_OK);
+ ASSERT_EQ(tablet.add_value(row, "temperature",
static_cast<float>(row)),
+ E_OK);
+ ASSERT_EQ(tablet.add_value(row, "pressure", static_cast<double>(row)),
+ E_OK);
+ ASSERT_EQ(tablet.add_value(row, "humidity", static_cast<int32_t>(row)),
+ E_OK);
+ }
+ for (int row = 5; row < 10; row++) {
+ ASSERT_EQ(tablet.add_timestamp(row, row), E_OK);
+ ASSERT_EQ(tablet.add_value(row, "deviceid1", "device_b"), E_OK);
+ ASSERT_EQ(tablet.add_value(row, "deviceid2", "device_a"), E_OK);
+ ASSERT_EQ(tablet.add_value(row, "temperature",
static_cast<float>(row)),
+ E_OK);
+ ASSERT_EQ(tablet.add_value(row, "pressure", static_cast<double>(row)),
+ E_OK);
+ ASSERT_EQ(tablet.add_value(row, "humidity", static_cast<int32_t>(row)),
+ E_OK);
+ }
+
+ // Append one row whose middle TAG segment is null.
+ Tablet null_tag_tablet(table_schema->get_table_name(),
+ table_schema->get_measurement_names(),
+ table_schema->get_data_types(),
+ table_schema->get_column_categories(), 1);
+ int64_t null_tag_ts[1] = {10};
+ int32_t null_tag_humidity[1] = {10};
+ float null_tag_temperature[1] = {10.0F};
+ double null_tag_pressure[1] = {10.0};
+ // deviceid1 = null
+ int32_t id1_offsets[2] = {0, 0};
+ uint8_t id1_bitmap[1] = {0x01}; // row0 is null
+ // deviceid2 = "device_b"
+ int32_t id2_offsets[2] = {0, 8};
+ const char id2_data[] = "device_b";
+ ASSERT_EQ(null_tag_tablet.set_timestamps(null_tag_ts, 1), E_OK);
+ ASSERT_EQ(null_tag_tablet.set_column_string_values(0, id1_offsets, "",
+ id1_bitmap, 1),
+ E_OK);
+ ASSERT_EQ(null_tag_tablet.set_column_string_values(1, id2_offsets,
id2_data,
+ nullptr, 1),
+ E_OK);
+ ASSERT_EQ(
+ null_tag_tablet.set_column_values(2, null_tag_temperature, nullptr, 1),
+ E_OK);
+ ASSERT_EQ(
+ null_tag_tablet.set_column_values(3, null_tag_pressure, nullptr, 1),
+ E_OK);
+ ASSERT_EQ(
+ null_tag_tablet.set_column_values(4, null_tag_humidity, nullptr, 1),
+ E_OK);
+
+ ASSERT_EQ(tsfile_writer_->write_table(tablet), E_OK);
+ ASSERT_EQ(tsfile_writer_->write_table(null_tag_tablet), E_OK);
+ ASSERT_EQ(tsfile_writer_->flush(), E_OK);
+ ASSERT_EQ(tsfile_writer_->close(), E_OK);
+
+ storage::TsFileReader reader;
+ ASSERT_EQ(reader.open(file_name_), common::E_OK);
+
+ auto all_meta = reader.get_timeseries_metadata();
+ ASSERT_EQ(all_meta.size(), 3u);
+
+ std::vector<std::string> selected_device_segments = {
+ "testtable", "device_a", "device_b"};
+ std::vector<std::shared_ptr<IDeviceID>> selected_devices = {
+ std::make_shared<StringArrayDeviceID>(selected_device_segments)};
+ auto selected_meta = reader.get_timeseries_metadata(selected_devices);
+ ASSERT_EQ(selected_meta.size(), 1u);
+
+ auto selected_list = selected_meta.begin()->second;
+ std::unordered_map<std::string, TSDataType> type_by_measurement;
+ for (const auto& index : selected_list) {
+ type_by_measurement[index->get_measurement_name().to_std_string()] =
+ index->get_data_type();
+ }
+ ASSERT_EQ(type_by_measurement.at("temperature"), TSDataType::FLOAT);
+ ASSERT_EQ(type_by_measurement.at("pressure"), TSDataType::DOUBLE);
+ ASSERT_EQ(type_by_measurement.at("humidity"), TSDataType::INT32);
+
+ // Query metadata for the device with null middle TAG segment.
+ std::vector<std::string*> null_seg_device = {
+ new std::string("testtable"), nullptr, new std::string("device_b")};
+ std::vector<std::shared_ptr<IDeviceID>> null_seg_devices = {
+ std::make_shared<StringArrayDeviceID>(null_seg_device)};
+ for (auto* seg : null_seg_device) {
+ if (seg != nullptr) {
+ delete seg;
+ }
+ }
+ auto null_seg_meta = reader.get_timeseries_metadata(null_seg_devices);
+ ASSERT_EQ(null_seg_meta.size(), 1u);
+ auto null_seg_list = null_seg_meta.begin()->second;
+ ASSERT_EQ(null_seg_list.size(), 3u);
+ std::unordered_map<std::string, TSDataType> null_seg_type_by_measurement;
+ for (const auto& index : null_seg_list) {
+ null_seg_type_by_measurement[index->get_measurement_name()
+ .to_std_string()] =
+ index->get_data_type();
+ }
+ ASSERT_EQ(null_seg_type_by_measurement.at("temperature"),
+ TSDataType::FLOAT);
+ ASSERT_EQ(null_seg_type_by_measurement.at("pressure"), TSDataType::DOUBLE);
+ ASSERT_EQ(null_seg_type_by_measurement.at("humidity"), TSDataType::INT32);
+
+ reader.close();
+}
+
static const int64_t kLargeFileNumRecords = 300000000;
static const int64_t kLargeFileFlushBatch = 100000;