This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new fa3e01f1 fix(cpp): aligned VECTOR row-offset skip only when time/value
counts match (#778)
fa3e01f1 is described below
commit fa3e01f1895dd2d498a63c552f50bf27eb85b797
Author: Hongzhi Gao <[email protected]>
AuthorDate: Sun Apr 12 15:57:11 2026 +0800
fix(cpp): aligned VECTOR row-offset skip only when time/value counts match
(#778)
Whole-chunk and whole-page skips by statistic count previously used only the
value side for aligned series, which could desynchronize row_offset from
decoded rows when ChunkMeta or page header counts differed.
Require both time and value statistics to be present, positive, and equal
before applying count-based skip; otherwise decode and rely on page/row
handling.
Made-with: Cursor
---
cpp/src/reader/aligned_chunk_reader.cc | 18 ++++++++++++------
cpp/src/reader/tsfile_series_scan_iterator.cc | 27 ++++++++++++++++++++++++++-
cpp/src/reader/tsfile_series_scan_iterator.h | 8 ++++++++
3 files changed, 46 insertions(+), 7 deletions(-)
diff --git a/cpp/src/reader/aligned_chunk_reader.cc
b/cpp/src/reader/aligned_chunk_reader.cc
index 955715d4..d79bc781 100644
--- a/cpp/src/reader/aligned_chunk_reader.cc
+++ b/cpp/src/reader/aligned_chunk_reader.cc
@@ -740,15 +740,21 @@ bool AlignedChunkReader::should_skip_page_by_offset(int&
row_offset) {
if (row_offset <= 0) {
return false;
}
- // Use time page statistic for count.
- Statistic* stat = cur_time_page_header_.statistic_;
- if (stat == nullptr) {
- stat = cur_value_page_header_.statistic_;
+ // Aligned TV pages: only skip a whole page by count when both page headers
+ // expose the same positive row count. Using a single side (or min) when
+ // the other is missing or unequal can desynchronize row_offset from
+ // decoded row order vs. the paired time/value stream.
+ Statistic* ts = cur_time_page_header_.statistic_;
+ Statistic* vs = cur_value_page_header_.statistic_;
+ if (ts == nullptr || vs == nullptr) {
+ return false;
}
- if (stat == nullptr || stat->count_ == 0) {
+ int32_t tc = ts->count_;
+ int32_t vc = vs->count_;
+ if (tc <= 0 || vc <= 0 || tc != vc) {
return false;
}
- int32_t count = stat->count_;
+ int32_t count = tc;
if (row_offset >= count) {
row_offset -= count;
return true;
diff --git a/cpp/src/reader/tsfile_series_scan_iterator.cc
b/cpp/src/reader/tsfile_series_scan_iterator.cc
index c363d0a4..5add1e9a 100644
--- a/cpp/src/reader/tsfile_series_scan_iterator.cc
+++ b/cpp/src/reader/tsfile_series_scan_iterator.cc
@@ -60,6 +60,30 @@ bool
TsFileSeriesScanIterator::should_skip_chunk_by_offset(ChunkMeta* cm) {
return false;
}
+bool TsFileSeriesScanIterator::should_skip_aligned_chunk_by_offset(
+ ChunkMeta* time_cm, ChunkMeta* value_cm) {
+ if (row_offset_ <= 0) {
+ return false;
+ }
+ if (time_cm->statistic_ == nullptr || value_cm->statistic_ == nullptr) {
+ return false;
+ }
+ int32_t tc = time_cm->statistic_->count_;
+ int32_t vc = value_cm->statistic_->count_;
+ if (tc <= 0 || vc <= 0) {
+ return false;
+ }
+ if (tc != vc) {
+ return false;
+ }
+ int32_t count = tc;
+ if (row_offset_ >= count) {
+ row_offset_ -= count;
+ return true;
+ }
+ return false;
+}
+
int TsFileSeriesScanIterator::get_next(TsBlock*& ret_tsblock, bool alloc,
Filter* oneshoot_filter,
int64_t min_time_hint) {
@@ -106,7 +130,8 @@ int TsFileSeriesScanIterator::get_next(TsBlock*&
ret_tsblock, bool alloc,
min_time_hint)) {
continue;
}
- if (should_skip_chunk_by_offset(value_cm)) {
+ if (should_skip_aligned_chunk_by_offset(time_cm,
+ value_cm)) {
continue;
}
chunk_reader_->reset();
diff --git a/cpp/src/reader/tsfile_series_scan_iterator.h
b/cpp/src/reader/tsfile_series_scan_iterator.h
index 06b35ba1..9e790a3d 100644
--- a/cpp/src/reader/tsfile_series_scan_iterator.h
+++ b/cpp/src/reader/tsfile_series_scan_iterator.h
@@ -119,6 +119,14 @@ class TsFileSeriesScanIterator {
}
bool should_skip_chunk_by_time(ChunkMeta* cm, int64_t min_time_hint);
bool should_skip_chunk_by_offset(ChunkMeta* cm);
+ /**
+ * Aligned (VECTOR): whole-chunk skip by row count is only safe when the
+ * time ChunkMeta and value ChunkMeta agree on statistic count (>0). If
+ * either side lacks count or counts differ, skip is disabled for this
+ * chunk; pages are loaded and page/row-level offset handling applies.
+ */
+ bool should_skip_aligned_chunk_by_offset(ChunkMeta* time_cm,
+ ChunkMeta* value_cm);
common::TsBlock* alloc_tsblock();
private: