This is an automated email from the ASF dual-hosted git repository.
colinlee pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new 6073785a Fix/tree query and rle decoder (#764)
6073785a is described below
commit 6073785a40fdabec7d14c019bdbf815219ffda57
Author: Colin Lee <[email protected]>
AuthorDate: Thu Apr 2 16:16:33 2026 +0800
Fix/tree query and rle decoder (#764)
---
cpp/src/common/device_id.cc | 3 +-
cpp/src/encoding/dictionary_decoder.h | 3 +-
cpp/src/encoding/int32_rle_decoder.h | 83 ++++++++++---
cpp/src/encoding/int32_sprintz_decoder.h | 4 +-
cpp/src/encoding/int64_rle_decoder.h | 99 ++++++++++++----
cpp/src/encoding/int64_sprintz_decoder.h | 4 +-
cpp/src/file/tsfile_io_reader.cc | 7 +-
cpp/test/encoding/int32_rle_codec_test.cc | 129 +++++++++++++++++++++
.../reader/tree_view/tsfile_reader_tree_test.cc | 84 ++++++++++++++
9 files changed, 367 insertions(+), 49 deletions(-)
diff --git a/cpp/src/common/device_id.cc b/cpp/src/common/device_id.cc
index 02cd3251..b35a8593 100644
--- a/cpp/src/common/device_id.cc
+++ b/cpp/src/common/device_id.cc
@@ -209,11 +209,10 @@ std::vector<std::string>
StringArrayDeviceID::split_device_id_string(
const std::string& device_id_string) {
#ifdef ENABLE_ANTLR4
auto splits = storage::PathNodesGenerator::invokeParser(device_id_string);
- return split_device_id_string(splits);
#else
auto splits = split_string(device_id_string, '.');
- return split_device_id_string(splits);
#endif
+ return split_device_id_string(splits);
}
std::vector<std::string> StringArrayDeviceID::split_device_id_string(
diff --git a/cpp/src/encoding/dictionary_decoder.h
b/cpp/src/encoding/dictionary_decoder.h
index 5f64b587..2962c66b 100644
--- a/cpp/src/encoding/dictionary_decoder.h
+++ b/cpp/src/encoding/dictionary_decoder.h
@@ -73,7 +73,8 @@ class DictionaryDecoder : public Decoder {
if (entry_index_.empty()) {
init_map(buffer);
}
- int code = value_decoder_.read_int(buffer);
+ int32_t code = 0;
+ value_decoder_.read_int(code, buffer);
return entry_index_[code];
}
diff --git a/cpp/src/encoding/int32_rle_decoder.h
b/cpp/src/encoding/int32_rle_decoder.h
index aee9048a..ef7b6f09 100644
--- a/cpp/src/encoding/int32_rle_decoder.h
+++ b/cpp/src/encoding/int32_rle_decoder.h
@@ -37,6 +37,8 @@ class Int32RleDecoder : public Decoder {
int bitpacking_num_;
bool is_length_and_bitwidth_readed_;
int current_count_;
+ bool is_rle_run_;
+ int32_t rle_value_;
common::ByteStream byte_cache_{common::MOD_DECODER_OBJ};
int32_t* current_buffer_;
Int32Packer* packer_;
@@ -49,6 +51,8 @@ class Int32RleDecoder : public Decoder {
bitpacking_num_(0),
is_length_and_bitwidth_readed_(false),
current_count_(0),
+ is_rle_run_(false),
+ rle_value_(0),
byte_cache_(1024, common::MOD_DECODER_OBJ),
current_buffer_(nullptr),
packer_(nullptr),
@@ -60,13 +64,14 @@ class Int32RleDecoder : public Decoder {
}
int read_boolean(bool& ret_value, common::ByteStream& in) override {
int32_t bool_value;
- read_int32(bool_value, in);
- ret_value = bool_value == 0 ? false : true;
- return common::E_OK;
+ int ret = read_int32(bool_value, in);
+ if (ret == common::E_OK) {
+ ret_value = bool_value != 0;
+ }
+ return ret;
}
int read_int32(int32_t& ret_value, common::ByteStream& in) override {
- ret_value = static_cast<int32_t>(read_int(in));
- return common::E_OK;
+ return read_int(ret_value, in);
}
int read_int64(int64_t& ret_value, common::ByteStream& in) override {
return common::E_TYPE_NOT_MATCH;
@@ -89,6 +94,8 @@ class Int32RleDecoder : public Decoder {
bit_width_ = 0;
bitpacking_num_ = 0;
current_count_ = 0;
+ is_rle_run_ = false;
+ rle_value_ = 0;
}
bool has_next(common::ByteStream& buffer) {
@@ -103,30 +110,69 @@ class Int32RleDecoder : public Decoder {
return current_count_ > 0 || byte_cache_.remaining_size() > 0;
}
- int32_t read_int(common::ByteStream& buffer) {
+ int read_int(int32_t& result, common::ByteStream& buffer) {
+ int ret = common::E_OK;
if (!is_length_and_bitwidth_readed_) {
// start to reader a new rle+bit-packing pattern
- read_length_and_bitwidth(buffer);
+ if (RET_FAIL(read_length_and_bitwidth(buffer))) {
+ return ret;
+ }
}
if (current_count_ == 0) {
- uint8_t header;
- int ret = common::E_OK;
- if (RET_FAIL(
- common::SerializationUtil::read_ui8(header, byte_cache_)))
{
+ // The header is encoded as an unsigned varint where:
+ // low bit = 0 => RLE run: header_value >> 1 is the run
+ // count low bit = 1 => bit-packing: header_value >> 1 is the
+ // group count
+ uint32_t header_value = 0;
+ if (RET_FAIL(common::SerializationUtil::read_var_uint(
+ header_value, byte_cache_))) {
return ret;
}
- call_read_bit_packing_buffer(header);
+ if (header_value & 1) {
+ if (RET_FAIL(call_read_bit_packing_buffer(header_value))) {
+ return ret;
+ }
+ } else {
+ if (RET_FAIL(call_read_rle_run(header_value))) {
+ return ret;
+ }
+ }
}
--current_count_;
- int32_t result = current_buffer_[bitpacking_num_ - current_count_ - 1];
+ result = is_rle_run_
+ ? rle_value_
+ : current_buffer_[bitpacking_num_ - current_count_ - 1];
if (!has_next_package()) {
is_length_and_bitwidth_readed_ = false;
}
- return result;
+ return ret;
}
- int call_read_bit_packing_buffer(uint8_t header) {
- int bit_packed_group_count = (int)(header >> 1);
+ int call_read_rle_run(uint32_t header_value) {
+ int ret = common::E_OK;
+ int run_length = (int)(header_value >> 1);
+ if (run_length <= 0) {
+ return common::E_DECODE_ERR;
+ }
+ int byte_width = (bit_width_ + 7) / 8;
+ // Read the repeated value (stored as byte_width bytes, little-endian)
+ int32_t value = 0;
+ for (int i = 0; i < byte_width; i++) {
+ uint8_t b;
+ if (RET_FAIL(common::SerializationUtil::read_ui8(b, byte_cache_)))
{
+ return ret;
+ }
+ value |= ((int32_t)b) << (i * 8);
+ }
+ rle_value_ = value;
+ is_rle_run_ = true;
+ current_count_ = run_length;
+ bitpacking_num_ = run_length;
+ return ret;
+ }
+
+ int call_read_bit_packing_buffer(uint32_t header_value) {
+ int bit_packed_group_count = (int)(header_value >> 1);
// in last bit-packing group, there may be some useless value,
// lastBitPackedNum indicates how many values is useful
uint8_t last_bit_packed_num;
@@ -139,6 +185,7 @@ class Int32RleDecoder : public Decoder {
current_count_ =
(bit_packed_group_count - 1) * 8 + last_bit_packed_num;
bitpacking_num_ = current_count_;
+ is_rle_run_ = false;
} else {
return common::E_DECODE_ERR;
}
@@ -236,8 +283,10 @@ class Int32RleDecoder : public Decoder {
bitpacking_num_ = 0;
is_length_and_bitwidth_readed_ = false;
current_count_ = 0;
+ is_rle_run_ = false;
+ rle_value_ = 0;
if (current_buffer_) {
- delete[] current_buffer_;
+ common::mem_free(current_buffer_);
current_buffer_ = nullptr;
}
if (packer_) {
diff --git a/cpp/src/encoding/int32_sprintz_decoder.h
b/cpp/src/encoding/int32_sprintz_decoder.h
index 3d15597e..a7c92eed 100644
--- a/cpp/src/encoding/int32_sprintz_decoder.h
+++ b/cpp/src/encoding/int32_sprintz_decoder.h
@@ -125,7 +125,9 @@ class Int32SprintzDecoder : public SprintzDecoder {
decode_size_ = bit_width_ & ~(1 << 7);
Int32RleDecoder decoder;
for (int i = 0; i < decode_size_; ++i) {
- current_buffer_[i] = decoder.read_int(input);
+ if (RET_FAIL(decoder.read_int(current_buffer_[i], input))) {
+ return ret;
+ }
}
} else {
decode_size_ = block_size_ + 1;
diff --git a/cpp/src/encoding/int64_rle_decoder.h
b/cpp/src/encoding/int64_rle_decoder.h
index 8010fe0f..df8e1783 100644
--- a/cpp/src/encoding/int64_rle_decoder.h
+++ b/cpp/src/encoding/int64_rle_decoder.h
@@ -37,6 +37,8 @@ class Int64RleDecoder : public Decoder {
int bitpacking_num_;
bool is_length_and_bitwidth_readed_;
int current_count_;
+ bool is_rle_run_;
+ int64_t rle_value_;
common::ByteStream byte_cache_{common::MOD_DECODER_OBJ};
int64_t* current_buffer_;
Int64Packer* packer_;
@@ -49,6 +51,8 @@ class Int64RleDecoder : public Decoder {
bitpacking_num_(0),
is_length_and_bitwidth_readed_(false),
current_count_(0),
+ is_rle_run_(false),
+ rle_value_(0),
byte_cache_(1024, common::MOD_DECODER_OBJ),
current_buffer_(nullptr),
packer_(nullptr),
@@ -65,8 +69,7 @@ class Int64RleDecoder : public Decoder {
return common::E_TYPE_NOT_MATCH;
}
int read_int64(int64_t& ret_value, common::ByteStream& in) override {
- ret_value = read_int(in);
- return common::E_OK;
+ return read_int(ret_value, in);
}
int read_float(float& ret_value, common::ByteStream& in) override {
return common::E_TYPE_NOT_MATCH;
@@ -86,6 +89,8 @@ class Int64RleDecoder : public Decoder {
bit_width_ = 0;
bitpacking_num_ = 0;
current_count_ = 0;
+ is_rle_run_ = false;
+ rle_value_ = 0;
}
bool has_next(common::ByteStream& buffer) {
@@ -100,30 +105,69 @@ class Int64RleDecoder : public Decoder {
return current_count_ > 0 || byte_cache_.remaining_size() > 0;
}
- int64_t read_int(common::ByteStream& buffer) {
+ int read_int(int64_t& result, common::ByteStream& buffer) {
+ int ret = common::E_OK;
if (!is_length_and_bitwidth_readed_) {
// start to reader a new rle+bit-packing pattern
- read_length_and_bitwidth(buffer);
+ if (RET_FAIL(read_length_and_bitwidth(buffer))) {
+ return ret;
+ }
}
if (current_count_ == 0) {
- uint8_t header;
- int ret = common::E_OK;
- if (RET_FAIL(
- common::SerializationUtil::read_ui8(header, byte_cache_)))
{
+ // The header is encoded as an unsigned varint where:
+ // low bit = 0 => RLE run: header_value >> 1 is the run
+ // count low bit = 1 => bit-packing: header_value >> 1 is the
+ // group count
+ uint32_t header_value = 0;
+ if (RET_FAIL(common::SerializationUtil::read_var_uint(
+ header_value, byte_cache_))) {
return ret;
}
- call_read_bit_packing_buffer(header);
+ if (header_value & 1) {
+ if (RET_FAIL(call_read_bit_packing_buffer(header_value))) {
+ return ret;
+ }
+ } else {
+ if (RET_FAIL(call_read_rle_run(header_value))) {
+ return ret;
+ }
+ }
}
--current_count_;
- int64_t result = current_buffer_[bitpacking_num_ - current_count_ - 1];
+ result = is_rle_run_
+ ? rle_value_
+ : current_buffer_[bitpacking_num_ - current_count_ - 1];
if (!has_next_package()) {
is_length_and_bitwidth_readed_ = false;
}
- return result;
+ return ret;
}
- int call_read_bit_packing_buffer(uint8_t header) {
- int bit_packed_group_count = (int)(header >> 1);
+ int call_read_rle_run(uint32_t header_value) {
+ int ret = common::E_OK;
+ int run_length = (int)(header_value >> 1);
+ if (run_length <= 0) {
+ return common::E_DECODE_ERR;
+ }
+ int byte_width = (bit_width_ + 7) / 8;
+ // Read the repeated value (stored as byte_width bytes, little-endian)
+ int64_t value = 0;
+ for (int i = 0; i < byte_width; i++) {
+ uint8_t b;
+ if (RET_FAIL(common::SerializationUtil::read_ui8(b, byte_cache_)))
{
+ return ret;
+ }
+ value |= ((int64_t)b) << (i * 8);
+ }
+ rle_value_ = value;
+ is_rle_run_ = true;
+ current_count_ = run_length;
+ bitpacking_num_ = run_length;
+ return ret;
+ }
+
+ int call_read_bit_packing_buffer(uint32_t header_value) {
+ int bit_packed_group_count = (int)(header_value >> 1);
// in last bit-packing group, there may be some useless value,
// lastBitPackedNum indicates how many values is useful
uint8_t last_bit_packed_num;
@@ -136,25 +180,27 @@ class Int64RleDecoder : public Decoder {
current_count_ =
(bit_packed_group_count - 1) * 8 + last_bit_packed_num;
bitpacking_num_ = current_count_;
+ is_rle_run_ = false;
} else {
- printf(
- "tsfile-encoding IntRleDecoder: bit_packed_group_count %d, "
- "smaller "
- "than 1",
- bit_packed_group_count);
+ return common::E_DECODE_ERR;
}
- read_bit_packing_buffer(bit_packed_group_count, last_bit_packed_num);
+ ret = read_bit_packing_buffer(bit_packed_group_count,
+ last_bit_packed_num);
return ret;
}
- void read_bit_packing_buffer(int bit_packed_group_count,
- int last_bit_packed_num) {
+ int read_bit_packing_buffer(int bit_packed_group_count,
+ int last_bit_packed_num) {
+ int ret = common::E_OK;
if (current_buffer_ != nullptr) {
common::mem_free(current_buffer_);
}
current_buffer_ = static_cast<int64_t*>(
common::mem_alloc(sizeof(int64_t) * bit_packed_group_count * 8,
common::MOD_DECODER_OBJ));
+ if (IS_NULL(current_buffer_)) {
+ return common::E_OOM;
+ }
int bytes_to_read = bit_packed_group_count * bit_width_;
if (bytes_to_read > (int)byte_cache_.remaining_size()) {
bytes_to_read = byte_cache_.remaining_size();
@@ -162,13 +208,17 @@ class Int64RleDecoder : public Decoder {
std::vector<unsigned char> bytes(bytes_to_read);
for (int i = 0; i < bytes_to_read; i++) {
- common::SerializationUtil::read_ui8(bytes[i], byte_cache_);
+ if (RET_FAIL(common::SerializationUtil::read_ui8(bytes[i],
+ byte_cache_))) {
+ return ret;
+ }
}
// save all int values in currentBuffer
packer_->unpack_all_values(
bytes.data(), bytes_to_read,
current_buffer_); // decode from bytes, save in currentBuffer
+ return ret;
}
int read_length_and_bitwidth(common::ByteStream& buffer) {
@@ -177,6 +227,9 @@ class Int64RleDecoder : public Decoder {
common::SerializationUtil::read_var_uint(length_, buffer))) {
return common::E_PARTIAL_READ;
} else {
+ if (tmp_buf_) {
+ common::mem_free(tmp_buf_);
+ }
tmp_buf_ =
(uint8_t*)common::mem_alloc(length_, common::MOD_DECODER_OBJ);
if (tmp_buf_ == nullptr) {
@@ -222,6 +275,8 @@ class Int64RleDecoder : public Decoder {
bitpacking_num_ = 0;
is_length_and_bitwidth_readed_ = false;
current_count_ = 0;
+ is_rle_run_ = false;
+ rle_value_ = 0;
if (current_buffer_) {
common::mem_free(current_buffer_);
current_buffer_ = nullptr;
diff --git a/cpp/src/encoding/int64_sprintz_decoder.h
b/cpp/src/encoding/int64_sprintz_decoder.h
index a7e3fdd2..7b082768 100644
--- a/cpp/src/encoding/int64_sprintz_decoder.h
+++ b/cpp/src/encoding/int64_sprintz_decoder.h
@@ -124,7 +124,9 @@ class Int64SprintzDecoder : public SprintzDecoder {
decode_size_ = bit_width_ & ~(1 << 7);
Int64RleDecoder decoder;
for (int i = 0; i < decode_size_; ++i) {
- current_buffer_[i] = decoder.read_int(input);
+ if (RET_FAIL(decoder.read_int(current_buffer_[i], input))) {
+ return ret;
+ }
}
} else {
decode_size_ = block_size_ + 1;
diff --git a/cpp/src/file/tsfile_io_reader.cc b/cpp/src/file/tsfile_io_reader.cc
index 69e12e45..31d4a303 100644
--- a/cpp/src/file/tsfile_io_reader.cc
+++ b/cpp/src/file/tsfile_io_reader.cc
@@ -300,15 +300,12 @@ int TsFileIOReader::load_device_index_entry(
}
std::string table_name =
device_id_comparable->device_id_->get_table_name();
auto it = tsfile_meta_.table_metadata_index_node_map_.find(table_name);
- if (it == tsfile_meta_.table_metadata_index_node_map_.end()) {
+ if (it == tsfile_meta_.table_metadata_index_node_map_.end() ||
+ it->second == nullptr) {
return E_DEVICE_NOT_EXIST;
}
auto index_node = it->second;
- if (index_node == nullptr) {
- return E_DEVICE_NOT_EXIST;
- }
if (index_node->node_type_ == LEAF_DEVICE) {
- // FIXME
ret = index_node->binary_search_children(
device_name, true, device_index_entry, end_offset);
} else {
diff --git a/cpp/test/encoding/int32_rle_codec_test.cc
b/cpp/test/encoding/int32_rle_codec_test.cc
index c580a0eb..dfc737c8 100644
--- a/cpp/test/encoding/int32_rle_codec_test.cc
+++ b/cpp/test/encoding/int32_rle_codec_test.cc
@@ -164,4 +164,133 @@ TEST_F(Int32RleEncoderTest, EncodeFlushWithoutData) {
EXPECT_EQ(stream.total_size(), 0u);
}
+// Helper: write a manually crafted RLE segment (Java/Parquet hybrid RLE
+// format):
+// [length_varint] [bit_width] [group_header_varint] [value_bytes...]
+// run_count must be the actual count (written as (run_count<<1)|0 varint).
+static void write_rle_segment(common::ByteStream& stream, uint8_t bit_width,
+ uint32_t run_count, int32_t value) {
+ common::ByteStream content(32, common::MOD_ENCODER_OBJ);
+ common::SerializationUtil::write_ui8(bit_width, content);
+ // Group header: (run_count << 1) | 0 = even varint
+ common::SerializationUtil::write_var_uint(run_count << 1, content);
+ // Value: ceil(bit_width / 8) bytes, little-endian
+ int byte_width = (bit_width + 7) / 8;
+ uint32_t uvalue = static_cast<uint32_t>(value);
+ for (int i = 0; i < byte_width; i++) {
+ common::SerializationUtil::write_ui8((uvalue >> (i * 8)) & 0xFF,
+ content);
+ }
+ uint32_t length = content.total_size();
+ common::SerializationUtil::write_var_uint(length, stream);
+ // Append content bytes to stream
+ uint8_t buf[64];
+ uint32_t read_len = 0;
+ content.read_buf(buf, length, read_len);
+ stream.write_buf(buf, read_len);
+}
+
+// Regression test: run_count=64 requires a 2-byte LEB128 varint header
+// ((64<<1)|0 = 128 = [0x80, 0x01]). Before the fix, only 1 byte was read,
+// causing byte misalignment and incorrect decoding.
+TEST_F(Int32RleEncoderTest, DecodeRleRunCountExactly64) {
+ common::ByteStream stream(32, common::MOD_ENCODER_OBJ);
+ write_rle_segment(stream, /*bit_width=*/7, /*run_count=*/64,
+ /*value=*/42);
+
+ Int32RleDecoder decoder;
+ std::vector<int32_t> decoded;
+ while (decoder.has_next(stream)) {
+ int32_t v;
+ decoder.read_int32(v, stream);
+ decoded.push_back(v);
+ }
+
+ ASSERT_EQ(decoded.size(), 64u);
+ for (int32_t v : decoded) {
+ EXPECT_EQ(v, 42);
+ }
+}
+
+// Run counts of 128 and 256 each need a 2-byte varint header.
+TEST_F(Int32RleEncoderTest, DecodeRleRunCountLarge) {
+ for (uint32_t count : {128u, 256u, 500u}) {
+ common::ByteStream stream(64, common::MOD_ENCODER_OBJ);
+ write_rle_segment(stream, /*bit_width=*/8, /*run_count=*/count,
+ /*value=*/100);
+
+ Int32RleDecoder decoder;
+ std::vector<int32_t> decoded;
+ while (decoder.has_next(stream)) {
+ int32_t v;
+ decoder.read_int32(v, stream);
+ decoded.push_back(v);
+ }
+
+ ASSERT_EQ(decoded.size(), (size_t)count)
+ << "Failed for run_count=" << count;
+ for (int32_t v : decoded) {
+ EXPECT_EQ(v, 100);
+ }
+ }
+}
+
+// Multiple consecutive RLE runs including large ones (simulates real sensor
+// data with repeated values and occasional changes).
+TEST_F(Int32RleEncoderTest, DecodeMultipleRleRunsWithLargeCount) {
+ common::ByteStream stream(128, common::MOD_ENCODER_OBJ);
+ write_rle_segment(stream, /*bit_width=*/8, /*run_count=*/64,
+ /*value=*/25);
+ write_rle_segment(stream, /*bit_width=*/8, /*run_count=*/8,
+ /*value=*/26);
+ write_rle_segment(stream, /*bit_width=*/8, /*run_count=*/100,
+ /*value=*/25);
+
+ Int32RleDecoder decoder;
+ std::vector<int32_t> decoded;
+ while (decoder.has_next(stream)) {
+ int32_t v;
+ decoder.read_int32(v, stream);
+ decoded.push_back(v);
+ }
+
+ ASSERT_EQ(decoded.size(), 172u); // 64 + 8 + 100
+ for (size_t i = 0; i < 64; i++) EXPECT_EQ(decoded[i], 25);
+ for (size_t i = 64; i < 72; i++) EXPECT_EQ(decoded[i], 26);
+ for (size_t i = 72; i < 172; i++) EXPECT_EQ(decoded[i], 25);
+}
+
+// Regression test: Int32RleDecoder::reset() previously called delete[] on
+// current_buffer_ which was allocated with mem_alloc (malloc). This is
+// undefined behaviour and typically causes a crash. The fix uses mem_free.
+TEST_F(Int32RleEncoderTest, ResetAfterDecodeNoCrash) {
+ common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
+ Int32RleEncoder encoder;
+ for (int i = 0; i < 16; i++) encoder.encode(i, stream);
+ encoder.flush(stream);
+
+ Int32RleDecoder decoder;
+ // Decode at least one value to populate current_buffer_ via mem_alloc.
+ int32_t v;
+ ASSERT_TRUE(decoder.has_next(stream));
+ decoder.read_int32(v, stream);
+
+ // reset() must use mem_free, not delete[]. Before the fix this would
crash.
+ decoder.reset();
+
+ // Verify the decoder is functional after reset.
+ common::ByteStream stream2(1024, common::MOD_ENCODER_OBJ);
+ Int32RleEncoder encoder2;
+ std::vector<int32_t> input = {7, 7, 7, 7, 7, 7, 7, 7};
+ for (int32_t x : input) encoder2.encode(x, stream2);
+ encoder2.flush(stream2);
+
+ std::vector<int32_t> decoded;
+ while (decoder.has_next(stream2)) {
+ decoder.read_int32(v, stream2);
+ decoded.push_back(v);
+ }
+ ASSERT_EQ(decoded, input);
+}
+
} // namespace storage
diff --git a/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc
b/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc
index aa4ff254..8181b613 100644
--- a/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc
+++ b/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc
@@ -24,6 +24,7 @@
#include "common/schema.h"
#include "common/tablet.h"
#include "file/write_file.h"
+#include "reader/result_set.h"
#include "reader/tsfile_reader.h"
#include "reader/tsfile_tree_reader.h"
#include "writer/tsfile_table_writer.h"
@@ -425,3 +426,86 @@ TEST_F(TsFileTreeReaderTest, ExtendedRowsAndColumnsTest) {
delete measurement;
}
}
+
+// Regression test: query_table_on_tree on a device path with three or more
+// dot-segments (e.g. "root.sensors.TH") previously SEGVed because:
+// 1. StringArrayDeviceID split "root.sensors.TH" into ["root","sensors","TH"]
+// instead of the correct ["root.sensors","TH"], so get_table_name()
returned
+// "root" instead of "root.sensors".
+// 2. load_device_index_entry used operator[] on the table map which inserted a
+// null entry, then asserted on it.
+TEST_F(TsFileTreeReaderTest, QueryTableOnTreeDeepDevicePath) {
+ TsFileTreeWriter writer(&write_file_);
+ // Device paths with 3 dot-segments: table_name="root.sensors", device="TH"
+ std::string device_id = "root.sensors.TH";
+ std::string m_temp = "temperature";
+ std::string m_humi = "humidity";
+ auto* ms_temp = new MeasurementSchema(m_temp, INT32);
+ auto* ms_humi = new MeasurementSchema(m_humi, INT32);
+ ASSERT_EQ(E_OK, writer.register_timeseries(device_id, ms_temp));
+ ASSERT_EQ(E_OK, writer.register_timeseries(device_id, ms_humi));
+ delete ms_temp;
+ delete ms_humi;
+
+ for (int ts = 0; ts < 5; ts++) {
+ TsRecord rec(device_id, ts);
+ rec.add_point(m_temp, static_cast<int32_t>(20 + ts));
+ rec.add_point(m_humi, static_cast<int32_t>(50 + ts));
+ ASSERT_EQ(E_OK, writer.write(rec));
+ }
+ writer.flush();
+ writer.close();
+
+ TsFileReader reader;
+ ASSERT_EQ(E_OK, reader.open(file_name_));
+ ResultSet* result;
+ // query_table_on_tree used to SEGV here due to wrong table-name lookup
+ ASSERT_EQ(E_OK, reader.query_table_on_tree({m_temp, m_humi}, INT64_MIN,
+ INT64_MAX, result));
+
+ auto* trs = static_cast<storage::TableResultSet*>(result);
+ bool has_next = false;
+ int row_cnt = 0;
+ while (IS_SUCC(trs->next(has_next)) && has_next) {
+ row_cnt++;
+ }
+ EXPECT_EQ(row_cnt, 5);
+ reader.destroy_query_data_set(result);
+ reader.close();
+}
+
+// Regression test: load_device_index_entry previously used operator[] to look
+// up the table node, which silently inserted a null entry and then asserted.
+// After the fix it uses find() and returns E_DEVICE_NOT_EXIST gracefully.
+// This is triggered when querying a measurement that no device in the file
has.
+TEST_F(TsFileTreeReaderTest, QueryTableOnTreeMissingMeasurement) {
+ // Use the same multi-device setup as ReadTreeByTable to ensure a valid
+ // file.
+ TsFileTreeWriter writer(&write_file_);
+ std::vector<std::string> device_ids = {"root.db1.t1", "root.db2.t1"};
+ std::string m_temp = "temperature";
+ for (auto dev : device_ids) {
+ auto* ms = new MeasurementSchema(m_temp, INT32);
+ ASSERT_EQ(E_OK, writer.register_timeseries(dev, ms));
+ delete ms;
+ TsRecord rec(dev, 0);
+ rec.add_point(m_temp, static_cast<int32_t>(25));
+ ASSERT_EQ(E_OK, writer.write(rec));
+ }
+ writer.flush();
+ writer.close();
+
+ TsFileReader reader;
+ ASSERT_EQ(E_OK, reader.open(file_name_));
+ ResultSet* result = nullptr;
+ // "nonexistent" is not present in any device. Before the fix,
+ // load_device_index_entry used operator[] which inserted null and crashed.
+ // After the fix it returns E_DEVICE_NOT_EXIST or E_COLUMN_NOT_EXIST.
+ int ret = reader.query_table_on_tree({"nonexistent"}, INT64_MIN, INT64_MAX,
+ result);
+ EXPECT_NE(ret, E_OK); // Must not succeed (measurement not found)
+ if (result != nullptr) {
+ reader.destroy_query_data_set(result);
+ }
+ reader.close();
+}