Copilot commented on code in PR #815:
URL: https://github.com/apache/tsfile/pull/815#discussion_r3213144630
##########
cpp/src/file/tsfile_io_reader.cc:
##########
@@ -155,6 +155,70 @@ int
TsFileIOReader::get_device_timeseries_meta_without_chunk_meta(
return ret;
}
+int TsFileIOReader::get_device_timeseries_meta_by_offset(
+ int64_t start_offset, int64_t end_offset,
+ std::vector<ITimeseriesIndex*>& timeseries_indexs, PageArena& pa) {
+ int ret = E_OK;
+ load_tsfile_meta_if_necessary();
+
+ std::vector<std::pair<std::shared_ptr<IMetaIndexEntry>, int64_t>>
+ meta_index_entry_list;
+ bool is_aligned = false;
+ TimeseriesIndex* time_timeseries_index = nullptr;
+
+ ASSERT(start_offset < end_offset);
+ const int32_t read_size = end_offset - start_offset;
+ int32_t ret_read_len = 0;
+ char* data_buf = (char*)pa.alloc(read_size);
+ void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode));
Review Comment:
`get_device_timeseries_meta_by_offset()` relies on `ASSERT(start_offset <
end_offset)` and then narrows `end_offset - start_offset` into an `int32_t`. In
release builds the assert may be compiled out, and a large/invalid range can
overflow the 32-bit size, leading to undersized allocations and out-of-bounds
reads. Please add a runtime validation (including bounds for int32) and return
an error when the offsets are invalid.
##########
cpp/src/file/tsfile_io_reader.cc:
##########
@@ -155,6 +155,70 @@ int
TsFileIOReader::get_device_timeseries_meta_without_chunk_meta(
return ret;
}
+int TsFileIOReader::get_device_timeseries_meta_by_offset(
+ int64_t start_offset, int64_t end_offset,
+ std::vector<ITimeseriesIndex*>& timeseries_indexs, PageArena& pa) {
+ int ret = E_OK;
+ load_tsfile_meta_if_necessary();
+
+ std::vector<std::pair<std::shared_ptr<IMetaIndexEntry>, int64_t>>
+ meta_index_entry_list;
+ bool is_aligned = false;
+ TimeseriesIndex* time_timeseries_index = nullptr;
+
+ ASSERT(start_offset < end_offset);
+ const int32_t read_size = end_offset - start_offset;
+ int32_t ret_read_len = 0;
+ char* data_buf = (char*)pa.alloc(read_size);
+ void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode));
+ if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) {
+ return E_OOM;
+ }
+ auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa);
+ auto top_node = std::shared_ptr<MetaIndexNode>(top_node_ptr,
+
MetaIndexNode::self_deleter);
+ if (RET_FAIL(read_file_->read(start_offset, data_buf, read_size,
+ ret_read_len))) {
+ return ret;
+ }
Review Comment:
After `read_file_->read(...)`, the code does not verify `ret_read_len ==
read_size` before deserializing. Since `ReadFile::read()` may return E_OK with
a short read, this can cause `MetaIndexNode::deserialize_from()` to read
incomplete/garbage data. Please check `ret_read_len` and return a
corruption/read error on partial reads.
##########
cpp/src/reader/tsfile_reader.cc:
##########
@@ -25,6 +25,71 @@
using namespace common;
using namespace storage;
+namespace {
+
+struct DeviceMetaEntry {
+ std::shared_ptr<IDeviceID> device_id;
+ int64_t start_offset;
+ int64_t end_offset;
+};
+
+int get_all_device_entries(std::vector<DeviceMetaEntry>& entries,
+ std::shared_ptr<MetaIndexNode> index_node,
+ ReadFile* read_file, PageArena& pa) {
Review Comment:
PR description mentions adding `TsFileReader::get_all_device_entries()`, but
the implementation is currently a free function in an anonymous namespace. If
this is intended as part of the TsFileReader API, it should be added as a class
method (and declared in the header); otherwise consider updating the PR
description to match the actual change.
##########
cpp/src/reader/tsfile_reader.cc:
##########
@@ -25,6 +25,71 @@
using namespace common;
using namespace storage;
+namespace {
+
+struct DeviceMetaEntry {
+ std::shared_ptr<IDeviceID> device_id;
+ int64_t start_offset;
+ int64_t end_offset;
+};
+
+int get_all_device_entries(std::vector<DeviceMetaEntry>& entries,
+ std::shared_ptr<MetaIndexNode> index_node,
+ ReadFile* read_file, PageArena& pa) {
+ int ret = E_OK;
+ if (index_node == nullptr) {
+ return ret;
+ }
+ if (index_node->node_type_ == LEAF_DEVICE) {
+ for (size_t i = 0; i < index_node->children_.size(); i++) {
+ DeviceMetaEntry entry;
+ entry.device_id = index_node->children_[i]->get_device_id();
+ entry.start_offset = index_node->children_[i]->get_offset();
+ entry.end_offset = (i + 1 < index_node->children_.size())
+ ? index_node->children_[i + 1]->get_offset()
+ : index_node->end_offset_;
+ entries.push_back(entry);
+ }
+ } else {
+ for (size_t idx = 0; idx < index_node->children_.size(); idx++) {
+ auto meta_index_entry = index_node->children_[idx];
+ int64_t start_offset = meta_index_entry->get_offset();
+ int64_t end_offset = index_node->end_offset_;
+ if (idx + 1 < index_node->children_.size()) {
+ end_offset = index_node->children_[idx + 1]->get_offset();
+ }
+ ASSERT(end_offset - start_offset > 0);
+ const int32_t read_size = (int32_t)(end_offset - start_offset);
+ int32_t ret_read_len = 0;
+ char* data_buf = (char*)pa.alloc(read_size);
+ void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode));
+ if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) {
Review Comment:
In `get_all_device_entries()`, the code narrows `(end_offset -
start_offset)` into an `int32_t` and does not validate that the range is
positive or fits, relying on `ASSERT`. In release builds, invalid/corrupt
offsets can lead to incorrect allocation sizes and undefined behavior. Please
add runtime checks for the offset range and return an error when invalid.
##########
cpp/src/file/tsfile_io_reader.cc:
##########
@@ -155,6 +155,70 @@ int
TsFileIOReader::get_device_timeseries_meta_without_chunk_meta(
return ret;
}
+int TsFileIOReader::get_device_timeseries_meta_by_offset(
+ int64_t start_offset, int64_t end_offset,
+ std::vector<ITimeseriesIndex*>& timeseries_indexs, PageArena& pa) {
+ int ret = E_OK;
+ load_tsfile_meta_if_necessary();
+
+ std::vector<std::pair<std::shared_ptr<IMetaIndexEntry>, int64_t>>
+ meta_index_entry_list;
+ bool is_aligned = false;
+ TimeseriesIndex* time_timeseries_index = nullptr;
+
+ ASSERT(start_offset < end_offset);
+ const int32_t read_size = end_offset - start_offset;
+ int32_t ret_read_len = 0;
+ char* data_buf = (char*)pa.alloc(read_size);
+ void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode));
+ if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) {
+ return E_OOM;
+ }
+ auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa);
+ auto top_node = std::shared_ptr<MetaIndexNode>(top_node_ptr,
+
MetaIndexNode::self_deleter);
+ if (RET_FAIL(read_file_->read(start_offset, data_buf, read_size,
+ ret_read_len))) {
+ return ret;
+ }
+ if (RET_FAIL(top_node->deserialize_from(data_buf, read_size))) {
+ return ret;
+ }
+
+ is_aligned = is_aligned_device(top_node);
+ if (is_aligned) {
+ if (RET_FAIL(get_time_column_metadata(top_node, time_timeseries_index,
+ pa))) {
+ return ret;
+ }
+ }
+
+ get_all_leaf(top_node, meta_index_entry_list);
+
+ if (RET_FAIL(do_load_all_timeseries_index(meta_index_entry_list, pa,
+ timeseries_indexs))) {
+ return ret;
Review Comment:
`get_all_leaf(top_node, meta_index_entry_list)` returns an error code, but
it’s currently ignored. If the meta index traversal fails (e.g., due to
partial/corrupted reads), this function will still proceed and may return
incomplete metadata without surfacing the error. Capture and propagate the
return code from `get_all_leaf`.
##########
cpp/src/reader/tsfile_reader.cc:
##########
@@ -25,6 +25,71 @@
using namespace common;
using namespace storage;
+namespace {
+
+struct DeviceMetaEntry {
+ std::shared_ptr<IDeviceID> device_id;
+ int64_t start_offset;
+ int64_t end_offset;
+};
+
+int get_all_device_entries(std::vector<DeviceMetaEntry>& entries,
+ std::shared_ptr<MetaIndexNode> index_node,
+ ReadFile* read_file, PageArena& pa) {
+ int ret = E_OK;
+ if (index_node == nullptr) {
+ return ret;
+ }
+ if (index_node->node_type_ == LEAF_DEVICE) {
+ for (size_t i = 0; i < index_node->children_.size(); i++) {
+ DeviceMetaEntry entry;
+ entry.device_id = index_node->children_[i]->get_device_id();
+ entry.start_offset = index_node->children_[i]->get_offset();
+ entry.end_offset = (i + 1 < index_node->children_.size())
+ ? index_node->children_[i + 1]->get_offset()
+ : index_node->end_offset_;
+ entries.push_back(entry);
+ }
+ } else {
+ for (size_t idx = 0; idx < index_node->children_.size(); idx++) {
+ auto meta_index_entry = index_node->children_[idx];
+ int64_t start_offset = meta_index_entry->get_offset();
+ int64_t end_offset = index_node->end_offset_;
+ if (idx + 1 < index_node->children_.size()) {
+ end_offset = index_node->children_[idx + 1]->get_offset();
+ }
+ ASSERT(end_offset - start_offset > 0);
+ const int32_t read_size = (int32_t)(end_offset - start_offset);
+ int32_t ret_read_len = 0;
+ char* data_buf = (char*)pa.alloc(read_size);
+ void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode));
+ if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) {
+ return E_OOM;
+ }
+ auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa);
+ auto top_node = std::shared_ptr<MetaIndexNode>(
+ top_node_ptr, [](MetaIndexNode* ptr) {
+ if (ptr) {
+ ptr->~MetaIndexNode();
+ }
+ });
+ if (RET_FAIL(read_file->read(start_offset, data_buf, read_size,
+ ret_read_len))) {
+ } else if (RET_FAIL(top_node->device_deserialize_from(data_buf,
+ read_size)))
{
+ } else {
+ ret = get_all_device_entries(entries, top_node, read_file, pa);
+ }
Review Comment:
`get_all_device_entries()` ignores `ret_read_len` after
`read_file->read(...)` and proceeds to deserialize even if fewer bytes were
read. Since `ReadFile::read()` can succeed with a short read, please verify
`ret_read_len == read_size` and return an appropriate error (e.g.,
corrupted/read error) on partial reads.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]