Copilot commented on code in PR #815:
URL: https://github.com/apache/tsfile/pull/815#discussion_r3213126524


##########
cpp/src/reader/tsfile_reader.cc:
##########
@@ -397,13 +458,33 @@ DeviceTimeseriesMetadataMap 
TsFileReader::get_timeseries_metadata(
 }
 
 DeviceTimeseriesMetadataMap TsFileReader::get_timeseries_metadata() {
-    // Collect metadata for all devices present in the file
     DeviceTimeseriesMetadataMap result;
-    auto device_ids = get_all_device_ids();
-    for (const auto& device_id : device_ids) {
-        std::vector<std::shared_ptr<ITimeseriesIndex>> list;
-        if (get_timeseries_metadata_impl(device_id, list) == E_OK) {
-            result.insert(std::make_pair(device_id, std::move(list)));
+    TsFileMeta* tsfile_meta = tsfile_executor_->get_tsfile_meta();
+    if (tsfile_meta == nullptr) {
+        return result;
+    }
+
+    PageArena pa;
+    pa.init(512, MOD_TSFILE_READER);
+    std::vector<DeviceMetaEntry> entries;
+    for (auto& table_entry : tsfile_meta->table_metadata_index_node_map_) {
+        get_all_device_entries(entries, table_entry.second, read_file_, pa);

Review Comment:
   The return value of get_all_device_entries() is ignored here. If traversal 
fails (e.g., I/O error / OOM), get_timeseries_metadata() will proceed with an 
incomplete `entries` list and silently return partial metadata. Capture and 
handle the error (e.g., early-return or stop iterating tables when RET_FAIL).
   



##########
cpp/src/reader/tsfile_reader.cc:
##########
@@ -25,6 +25,69 @@
 using namespace common;
 using namespace storage;
 
+namespace {
+
+struct DeviceMetaEntry {
+    std::shared_ptr<IDeviceID> device_id;
+    int64_t start_offset;
+    int64_t end_offset;
+};
+
+int get_all_device_entries(std::vector<DeviceMetaEntry>& entries,
+                           std::shared_ptr<MetaIndexNode> index_node,
+                           ReadFile* read_file, PageArena& pa) {
+    int ret = E_OK;
+    if (index_node == nullptr) {
+        return ret;
+    }
+    if (index_node->node_type_ == LEAF_DEVICE) {
+        for (size_t i = 0; i < index_node->children_.size(); i++) {
+            DeviceMetaEntry entry;
+            entry.device_id = index_node->children_[i]->get_device_id();
+            entry.start_offset = index_node->children_[i]->get_offset();
+            entry.end_offset =
+                (i + 1 < index_node->children_.size())
+                    ? index_node->children_[i + 1]->get_offset()
+                    : index_node->end_offset_;
+            entries.push_back(entry);
+        }
+    } else {
+        for (size_t idx = 0; idx < index_node->children_.size(); idx++) {
+            auto meta_index_entry = index_node->children_[idx];
+            int start_offset = meta_index_entry->get_offset();
+            int end_offset = index_node->end_offset_;
+            if (idx + 1 < index_node->children_.size()) {
+                end_offset = index_node->children_[idx + 1]->get_offset();
+            }
+            ASSERT(end_offset - start_offset > 0);
+            const int32_t read_size = (int32_t)end_offset - start_offset;
+            int32_t ret_read_len = 0;
+            char* data_buf = (char*)pa.alloc(read_size);
+            void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode));

Review Comment:
   `read_size` is computed as `(int32_t)end_offset - start_offset`, which 
truncates `end_offset` before subtraction. If offsets exceed INT32_MAX this can 
produce a bogus span and unsafe alloc/read. Compute the span in int64_t first, 
validate bounds (return E_OVERFLOW if needed), then cast to int32_t.



##########
cpp/src/file/tsfile_io_reader.cc:
##########
@@ -155,6 +155,70 @@ int 
TsFileIOReader::get_device_timeseries_meta_without_chunk_meta(
     return ret;
 }
 
+int TsFileIOReader::get_device_timeseries_meta_by_offset(
+    int64_t start_offset, int64_t end_offset,
+    std::vector<ITimeseriesIndex*>& timeseries_indexs, PageArena& pa) {
+    int ret = E_OK;
+    load_tsfile_meta_if_necessary();
+
+    std::vector<std::pair<std::shared_ptr<IMetaIndexEntry>, int64_t>>
+        meta_index_entry_list;
+    bool is_aligned = false;
+    TimeseriesIndex* time_timeseries_index = nullptr;
+
+    ASSERT(start_offset < end_offset);
+    const int32_t read_size = end_offset - start_offset;
+    int32_t ret_read_len = 0;
+    char* data_buf = (char*)pa.alloc(read_size);
+    void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode));

Review Comment:
   get_device_timeseries_meta_by_offset() narrows `end_offset - start_offset` 
into an int32_t. If the span exceeds INT32_MAX, `read_size` will overflow and 
the subsequent alloc/read becomes unsafe. Add a bounds check (return E_OVERFLOW 
/ E_INVALID_ARG) before casting.



##########
cpp/src/reader/tsfile_reader.cc:
##########
@@ -25,6 +25,69 @@
 using namespace common;
 using namespace storage;
 
+namespace {
+
+struct DeviceMetaEntry {
+    std::shared_ptr<IDeviceID> device_id;
+    int64_t start_offset;
+    int64_t end_offset;
+};
+
+int get_all_device_entries(std::vector<DeviceMetaEntry>& entries,
+                           std::shared_ptr<MetaIndexNode> index_node,
+                           ReadFile* read_file, PageArena& pa) {
+    int ret = E_OK;
+    if (index_node == nullptr) {
+        return ret;
+    }
+    if (index_node->node_type_ == LEAF_DEVICE) {
+        for (size_t i = 0; i < index_node->children_.size(); i++) {
+            DeviceMetaEntry entry;
+            entry.device_id = index_node->children_[i]->get_device_id();
+            entry.start_offset = index_node->children_[i]->get_offset();
+            entry.end_offset =
+                (i + 1 < index_node->children_.size())
+                    ? index_node->children_[i + 1]->get_offset()
+                    : index_node->end_offset_;
+            entries.push_back(entry);
+        }
+    } else {
+        for (size_t idx = 0; idx < index_node->children_.size(); idx++) {
+            auto meta_index_entry = index_node->children_[idx];
+            int start_offset = meta_index_entry->get_offset();
+            int end_offset = index_node->end_offset_;
+            if (idx + 1 < index_node->children_.size()) {
+                end_offset = index_node->children_[idx + 1]->get_offset();
+            }
+            ASSERT(end_offset - start_offset > 0);
+            const int32_t read_size = (int32_t)end_offset - start_offset;

Review Comment:
   In get_all_device_entries() the internal-node offsets are stored in `int` 
even though MetaIndexEntry offsets are int64 file positions. This can truncate 
offsets for large tsfiles and cause incorrect reads. Use int64_t for start/end 
offsets throughout the traversal.
   



##########
cpp/src/reader/tsfile_reader.cc:
##########
@@ -25,6 +25,69 @@
 using namespace common;
 using namespace storage;
 
+namespace {
+
+struct DeviceMetaEntry {
+    std::shared_ptr<IDeviceID> device_id;
+    int64_t start_offset;
+    int64_t end_offset;
+};
+
+int get_all_device_entries(std::vector<DeviceMetaEntry>& entries,
+                           std::shared_ptr<MetaIndexNode> index_node,
+                           ReadFile* read_file, PageArena& pa) {
+    int ret = E_OK;
+    if (index_node == nullptr) {
+        return ret;
+    }
+    if (index_node->node_type_ == LEAF_DEVICE) {
+        for (size_t i = 0; i < index_node->children_.size(); i++) {
+            DeviceMetaEntry entry;
+            entry.device_id = index_node->children_[i]->get_device_id();
+            entry.start_offset = index_node->children_[i]->get_offset();
+            entry.end_offset =
+                (i + 1 < index_node->children_.size())
+                    ? index_node->children_[i + 1]->get_offset()
+                    : index_node->end_offset_;
+            entries.push_back(entry);
+        }
+    } else {
+        for (size_t idx = 0; idx < index_node->children_.size(); idx++) {
+            auto meta_index_entry = index_node->children_[idx];
+            int start_offset = meta_index_entry->get_offset();
+            int end_offset = index_node->end_offset_;
+            if (idx + 1 < index_node->children_.size()) {
+                end_offset = index_node->children_[idx + 1]->get_offset();
+            }
+            ASSERT(end_offset - start_offset > 0);
+            const int32_t read_size = (int32_t)end_offset - start_offset;
+            int32_t ret_read_len = 0;
+            char* data_buf = (char*)pa.alloc(read_size);
+            void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode));
+            if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) {
+                return E_OOM;
+            }
+            auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa);
+            auto top_node = std::shared_ptr<MetaIndexNode>(
+                top_node_ptr, [](MetaIndexNode* ptr) {
+                    if (ptr) {
+                        ptr->~MetaIndexNode();
+                    }
+                });
+            if (RET_FAIL(read_file->read(start_offset, data_buf, read_size,
+                                         ret_read_len))) {
+            } else if (RET_FAIL(top_node->device_deserialize_from(
+                           data_buf, read_size))) {
+            } else {
+                ret = get_all_device_entries(entries, top_node, read_file, pa);

Review Comment:
   On read/deserialize failure, this loop continues and a later success can 
overwrite `ret` back to E_OK, so callers may get partial results without an 
error. Consider returning/breaking immediately on RET_FAIL(...) (or at least 
preserving the first error).
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to