This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new fa24ad1b896 [improve](log) Add segment file info when bitshuffle page
corruption detected (#60547)
fa24ad1b896 is described below
commit fa24ad1b89651484e6c3b7d3463e0247a5404e6d
Author: Luwei <[email protected]>
AuthorDate: Thu Feb 12 00:22:54 2026 +0800
[improve](log) Add segment file info when bitshuffle page corruption
detected (#60547)
When bitshuffle page header corruption is detected, the current error
message only shows the corrupted values without any context about which
segment file is affected. This makes debugging very difficult.
This PR adds:
1. File path, page offset, page size, and page index to the error log in
FileColumnIterator::_read_data_page()
2. Enhanced error message in parse_bit_shuffle_header() to include
expected_padding, compressed_size, size_of_element, and data_size
3. Similar logging in IndexedColumnIterator for completeness
Example output after this change:
W20260130 16:40:19 column_reader.cpp:1517] failed to create ParsedPage,
file=/path/to/data/tablet_id/rowset_id.dat, page_offset=12345678,
page_size=65536, page_index=42, error=[INTERNAL_ERROR]num of element
information corrupted, _num_element_after_padding:1364882540,
_num_elements:1197762646, expected_padding:1197762648, ...
---
be/src/olap/rowset/segment_v2/bitshuffle_page.h | 8 +++++---
be/src/olap/rowset/segment_v2/column_reader.cpp | 11 ++++++++---
be/src/olap/rowset/segment_v2/indexed_column_reader.cpp | 5 +++++
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
index f518ac655d7..5a9c14b735c 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
@@ -282,13 +282,15 @@ inline Status parse_bit_shuffle_header(const Slice& data,
size_t& num_elements,
num_elements = decode_fixed32_le((const uint8_t*)&data[0]);
compressed_size = decode_fixed32_le((const uint8_t*)&data[4]);
num_element_after_padding = decode_fixed32_le((const uint8_t*)&data[8]);
+ size_of_element = decode_fixed32_le((const uint8_t*)&data[12]);
if (num_element_after_padding != ALIGN_UP(num_elements, 8)) {
return Status::InternalError(
"num of element information corrupted,"
- " _num_element_after_padding:{}, _num_elements:{}",
- num_element_after_padding, num_elements);
+ " _num_element_after_padding:{}, _num_elements:{},
expected_padding:{},"
+ " compressed_size:{}, size_of_element:{}, data_size:{}",
+ num_element_after_padding, num_elements,
ALIGN_UP(num_elements, 8), compressed_size,
+ size_of_element, data.size);
}
- size_of_element = decode_fixed32_le((const uint8_t*)&data[12]);
switch (size_of_element) {
case 1:
case 2:
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 104cb45c866..4bff9a61761 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -1976,9 +1976,14 @@ Status FileColumnIterator::_read_data_page(const
OrdinalPageIndexIterator& iter)
RETURN_IF_ERROR(
_reader->read_page(_opts, iter.page(), &handle, &page_body,
&footer, _compress_codec));
// parse data page
- RETURN_IF_ERROR(ParsedPage::create(std::move(handle), page_body,
footer.data_page_footer(),
- _reader->encoding_info(), iter.page(),
iter.page_index(),
- &_page));
+ auto st = ParsedPage::create(std::move(handle), page_body,
footer.data_page_footer(),
+ _reader->encoding_info(), iter.page(),
iter.page_index(), &_page);
+ if (!st.ok()) {
+ LOG(WARNING) << "failed to create ParsedPage, file=" <<
_opts.file_reader->path().native()
+ << ", page_offset=" << iter.page().offset << ",
page_size=" << iter.page().size
+ << ", page_index=" << iter.page_index() << ", error=" <<
st;
+ return st;
+ }
// dictionary page is read when the first data page that uses it is read,
// this is to optimize the memory usage: when there is no query on one
column, we could
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index 60327d36194..62325f1dbe2 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -165,6 +165,11 @@ Status IndexedColumnIterator::_read_data_page(const
PagePointer& pp) {
opts.need_check_bitmap = false;
status = ParsedPage::create(std::move(handle), body,
footer.data_page_footer(),
_reader->encoding_info(), pp, 0, &_data_page,
opts);
+ if (!status.ok()) {
+ LOG(WARNING) << "failed to create ParsedPage in IndexedColumnIterator,
file="
+ << _reader->_file_reader->path().native() << ",
page_offset=" << pp.offset
+ << ", page_size=" << pp.size << ", error=" << status;
+ }
DCHECK(_reader->_meta.ordinal_index_meta().is_root_data_page()
? _reader->_meta.num_values() == _data_page.num_rows
: true);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]