This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 56b3ffef27a [enhancement](statistics) Add segment footer open stats to
profile (#50326)
56b3ffef27a is described below
commit 56b3ffef27af3acc2335bffd28ed691c5952bbab
Author: Siyang Tang <[email protected]>
AuthorDate: Fri Apr 25 09:49:48 2025 +0800
[enhancement](statistics) Add segment footer open stats to profile (#50326)
---
be/src/olap/rowset/beta_rowset.cpp | 7 +++---
be/src/olap/rowset/beta_rowset.h | 4 ++-
.../segment_v2/lazy_init_segment_iterator.cpp | 9 +++----
be/src/olap/rowset/segment_v2/segment.cpp | 29 ++++++++++++----------
be/src/olap/rowset/segment_v2/segment.h | 12 +++++----
be/src/olap/segment_loader.cpp | 2 +-
be/test/olap/date_bloom_filter_test.cpp | 8 +++---
be/test/olap/segment_footer_cache_test.cpp | 8 +++---
8 files changed, 43 insertions(+), 36 deletions(-)
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index b25b261c2bb..850a3480708 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -171,14 +171,15 @@ Status BetaRowset::load_segments(int64_t seg_id_begin,
int64_t seg_id_end,
int64_t seg_id = seg_id_begin;
while (seg_id < seg_id_end) {
std::shared_ptr<segment_v2::Segment> segment;
- RETURN_IF_ERROR(load_segment(seg_id, &segment));
+ RETURN_IF_ERROR(load_segment(seg_id, nullptr, &segment));
segments->push_back(std::move(segment));
seg_id++;
}
return Status::OK();
}
-Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr*
segment) {
+Status BetaRowset::load_segment(int64_t seg_id, OlapReaderStatistics* stats,
+ segment_v2::SegmentSharedPtr* segment) {
auto fs = _rowset_meta->fs();
if (!fs) {
return Status::Error<INIT_FAILED>("get fs failed");
@@ -196,7 +197,7 @@ Status BetaRowset::load_segment(int64_t seg_id,
segment_v2::SegmentSharedPtr* se
auto s = segment_v2::Segment::open(fs, seg_path,
_rowset_meta->tablet_id(), seg_id, rowset_id(),
_schema, reader_options, segment,
-
_rowset_meta->inverted_index_file_info(seg_id));
+
_rowset_meta->inverted_index_file_info(seg_id), stats);
if (!s.ok()) {
LOG(WARNING) << "failed to open segment. " << seg_path << " under
rowset " << rowset_id()
<< " : " << s.to_string();
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index 4b1388a1f08..f3a8230383d 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -26,6 +26,7 @@
#include <vector>
#include "common/status.h"
+#include "olap/olap_common.h"
#include "olap/rowset/rowset.h"
#include "olap/rowset/rowset_meta.h"
#include "olap/rowset/rowset_reader.h"
@@ -76,7 +77,8 @@ public:
Status load_segments(int64_t seg_id_begin, int64_t seg_id_end,
std::vector<segment_v2::SegmentSharedPtr>* segments);
- Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment);
+ Status load_segment(int64_t seg_id, OlapReaderStatistics* read_stats,
+ segment_v2::SegmentSharedPtr* segment);
Status get_segments_size(std::vector<size_t>* segments_size);
diff --git a/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp
index 77e2310fc48..af663b36a39 100644
--- a/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.cpp
@@ -30,9 +30,8 @@
LazyInitSegmentIterator::LazyInitSegmentIterator(BetaRowsetSharedPtr rowset, int
_schema(std::move(schema)),
_read_options(opts) {}
-/// Here do not use the argument of `opts`,
-/// see where the iterator is created in
`BetaRowsetReader::get_segment_iterators`
-Status LazyInitSegmentIterator::init(const StorageReadOptions& /*opts*/) {
+/// See where the iterator is created in
`BetaRowsetReader::get_segment_iterators`
+Status LazyInitSegmentIterator::init(const StorageReadOptions& opts) {
_need_lazy_init = false;
if (_inner_iterator) {
return Status::OK();
@@ -42,7 +41,7 @@ Status LazyInitSegmentIterator::init(const
StorageReadOptions& /*opts*/) {
{
SegmentCacheHandle segment_cache_handle;
RETURN_IF_ERROR(SegmentLoader::instance()->load_segment(
- _rowset, _segment_id, &segment_cache_handle,
_should_use_cache, false));
+ _rowset, _segment_id, &segment_cache_handle,
_should_use_cache, false, opts.stats));
const auto& tmp_segments = segment_cache_handle.get_segments();
segment = tmp_segments[0];
}
@@ -50,4 +49,4 @@ Status LazyInitSegmentIterator::init(const
StorageReadOptions& /*opts*/) {
return _inner_iterator->init(_read_options);
}
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 90135b5826d..6c283de3285 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -81,9 +81,9 @@ class InvertedIndexIterator;
Status Segment::open(io::FileSystemSPtr fs, const std::string& path, int64_t
tablet_id,
uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr
tablet_schema,
const io::FileReaderOptions& reader_options,
std::shared_ptr<Segment>* output,
- InvertedIndexFileInfo idx_file_info) {
+ InvertedIndexFileInfo idx_file_info,
OlapReaderStatistics* stats) {
auto s = _open(fs, path, segment_id, rowset_id, tablet_schema,
reader_options, output,
- idx_file_info);
+ idx_file_info, stats);
if (!s.ok()) {
if (!config::is_cloud_mode()) {
auto res = ExecEnv::get_tablet(tablet_id);
@@ -101,14 +101,14 @@ Status Segment::open(io::FileSystemSPtr fs, const
std::string& path, int64_t tab
Status Segment::_open(io::FileSystemSPtr fs, const std::string& path, uint32_t
segment_id,
RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
const io::FileReaderOptions& reader_options,
std::shared_ptr<Segment>* output,
- InvertedIndexFileInfo idx_file_info) {
+ InvertedIndexFileInfo idx_file_info,
OlapReaderStatistics* stats) {
io::FileReaderSPtr file_reader;
RETURN_IF_ERROR(fs->open_file(path, &file_reader, &reader_options));
std::shared_ptr<Segment> segment(
new Segment(segment_id, rowset_id, std::move(tablet_schema),
idx_file_info));
segment->_fs = fs;
segment->_file_reader = std::move(file_reader);
- auto st = segment->_open();
+ auto st = segment->_open(stats);
TEST_INJECTION_POINT_CALLBACK("Segment::open:corruption", &st);
if (st.is<ErrorCode::CORRUPTION>() &&
reader_options.cache_type == io::FileCachePolicy::FILE_BLOCK_CACHE) {
@@ -121,7 +121,7 @@ Status Segment::_open(io::FileSystemSPtr fs, const
std::string& path, uint32_t s
RETURN_IF_ERROR(fs->open_file(path, &file_reader, &reader_options));
segment->_file_reader = std::move(file_reader);
- st = segment->_open();
+ st = segment->_open(stats);
TEST_INJECTION_POINT_CALLBACK("Segment::open:corruption1", &st);
if (st.is<ErrorCode::CORRUPTION>()) { // corrupt again
LOG(WARNING) << "failed to try to read remote source file again
with cache support,"
@@ -134,7 +134,7 @@ Status Segment::_open(io::FileSystemSPtr fs, const
std::string& path, uint32_t s
opt.cache_type = io::FileCachePolicy::NO_CACHE; // skip cache
RETURN_IF_ERROR(fs->open_file(path, &file_reader, &opt));
segment->_file_reader = std::move(file_reader);
- st = segment->_open();
+ st = segment->_open(stats);
if (!st.ok()) {
LOG(WARNING) << "failed to try to read remote source file
directly,"
<< " file path: " << path
@@ -177,9 +177,9 @@ void Segment::update_metadata_size() {
_tracked_meta_mem_usage = _meta_mem_usage;
}
-Status Segment::_open() {
+Status Segment::_open(OlapReaderStatistics* stats) {
std::shared_ptr<SegmentFooterPB> footer_pb_shared;
- RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared));
+ RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared, stats));
_pk_index_meta.reset(
footer_pb_shared->has_primary_key_index_meta()
@@ -390,7 +390,8 @@ Status Segment::_write_error_file(size_t file_size, size_t
offset, size_t bytes_
return Status::OK(); // already exists
};
-Status Segment::_parse_footer(std::shared_ptr<SegmentFooterPB>& footer) {
+Status Segment::_parse_footer(std::shared_ptr<SegmentFooterPB>& footer,
+ OlapReaderStatistics* stats) {
// Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4),
MagicNumber(4)
auto file_size = _file_reader->size();
if (file_size < 12) {
@@ -402,7 +403,8 @@ Status
Segment::_parse_footer(std::shared_ptr<SegmentFooterPB>& footer) {
uint8_t fixed_buf[12];
size_t bytes_read = 0;
// TODO(plat1ko): Support session variable `enable_file_cache`
- io::IOContext io_ctx {.is_index_data = true};
+ io::IOContext io_ctx {.is_index_data = true,
+ .file_cache_stats = stats ? &stats->file_cache_stats
: nullptr};
RETURN_IF_ERROR(
_file_reader->read_at(file_size - 12, Slice(fixed_buf, 12),
&bytes_read, &io_ctx));
DCHECK_EQ(bytes_read, 12);
@@ -606,7 +608,7 @@ Status
Segment::_create_column_readers_once(OlapReaderStatistics* stats) {
SCOPED_RAW_TIMER(&stats->segment_create_column_readers_timer_ns);
return _create_column_readers_once_call.call([&] {
std::shared_ptr<SegmentFooterPB> footer_pb_shared;
- RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared));
+ RETURN_IF_ERROR(_get_segment_footer(footer_pb_shared, stats));
return _create_column_readers(*footer_pb_shared);
});
}
@@ -1185,7 +1187,8 @@ Status Segment::seek_and_read_by_rowid(const
TabletSchema& schema, SlotDescripto
return Status::OK();
}
-Status Segment::_get_segment_footer(std::shared_ptr<SegmentFooterPB>&
footer_pb) {
+Status Segment::_get_segment_footer(std::shared_ptr<SegmentFooterPB>&
footer_pb,
+ OlapReaderStatistics* stats) {
std::shared_ptr<SegmentFooterPB> footer_pb_shared = _footer_pb.lock();
if (footer_pb_shared != nullptr) {
footer_pb = footer_pb_shared;
@@ -1205,7 +1208,7 @@ Status
Segment::_get_segment_footer(std::shared_ptr<SegmentFooterPB>& footer_pb)
if (!segment_footer_cache->lookup(cache_key, &cache_handle,
segment_v2::PageTypePB::DATA_PAGE)) {
- RETURN_IF_ERROR(_parse_footer(footer_pb_shared));
+ RETURN_IF_ERROR(_parse_footer(footer_pb_shared, stats));
segment_footer_cache->insert(cache_key, footer_pb_shared,
footer_pb_shared->ByteSizeLong(),
&cache_handle,
segment_v2::PageTypePB::DATA_PAGE);
} else {
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index 382db37976c..4b08b9c0d00 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -83,7 +83,8 @@ public:
static Status open(io::FileSystemSPtr fs, const std::string& path, int64_t
tablet_id,
uint32_t segment_id, RowsetId rowset_id,
TabletSchemaSPtr tablet_schema,
const io::FileReaderOptions& reader_options,
- std::shared_ptr<Segment>* output, InvertedIndexFileInfo
idx_file_info = {});
+ std::shared_ptr<Segment>* output, InvertedIndexFileInfo
idx_file_info = {},
+ OlapReaderStatistics* stats = nullptr);
static io::UInt128Wrapper file_cache_key(std::string_view rowset_id,
uint32_t seg_id);
io::UInt128Wrapper file_cache_key() const {
@@ -225,10 +226,11 @@ private:
static Status _open(io::FileSystemSPtr fs, const std::string& path,
uint32_t segment_id,
RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
const io::FileReaderOptions& reader_options,
- std::shared_ptr<Segment>* output,
InvertedIndexFileInfo idx_file_info);
+ std::shared_ptr<Segment>* output,
InvertedIndexFileInfo idx_file_info,
+ OlapReaderStatistics* stats);
// open segment file and read the minimum amount of necessary information
(footer)
- Status _open();
- Status _parse_footer(std::shared_ptr<SegmentFooterPB>& footer);
+ Status _open(OlapReaderStatistics* stats);
+ Status _parse_footer(std::shared_ptr<SegmentFooterPB>& footer,
OlapReaderStatistics* stats);
Status _create_column_readers(const SegmentFooterPB& footer);
Status _load_pk_bloom_filter(OlapReaderStatistics* stats);
ColumnReader* _get_column_reader(const TabletColumn& col);
@@ -245,7 +247,7 @@ private:
Status _create_column_readers_once(OlapReaderStatistics* stats);
- Status _get_segment_footer(std::shared_ptr<SegmentFooterPB>&);
+ Status _get_segment_footer(std::shared_ptr<SegmentFooterPB>&,
OlapReaderStatistics* stats);
StoragePageCache::CacheKey get_segment_footer_cache_key() const;
diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp
index 1bd21ad4e55..5385a2d5cf0 100644
--- a/be/src/olap/segment_loader.cpp
+++ b/be/src/olap/segment_loader.cpp
@@ -67,7 +67,7 @@ Status SegmentLoader::load_segment(const BetaRowsetSharedPtr&
rowset, int64_t se
}
// If the segment is not healthy, then will create a new segment and will
replace the unhealthy one in SegmentCache.
segment_v2::SegmentSharedPtr segment;
- RETURN_IF_ERROR(rowset->load_segment(segment_id, &segment));
+ RETURN_IF_ERROR(rowset->load_segment(segment_id, index_load_stats,
&segment));
if (need_load_pk_index_and_bf) {
RETURN_IF_ERROR(segment->load_pk_index_and_bf(index_load_stats));
}
diff --git a/be/test/olap/date_bloom_filter_test.cpp
b/be/test/olap/date_bloom_filter_test.cpp
index f0e8eb11579..e902e61cf21 100644
--- a/be/test/olap/date_bloom_filter_test.cpp
+++ b/be/test/olap/date_bloom_filter_test.cpp
@@ -150,9 +150,9 @@ TEST_F(DateBloomFilterTest, query_index_test) {
EXPECT_TRUE(_tablet->add_rowset(rowset).ok());
segment_v2::SegmentSharedPtr segment;
- EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok());
+ EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, nullptr,
&segment).ok());
std::shared_ptr<SegmentFooterPB> footer_pb_shared;
- auto st = segment->_get_segment_footer(footer_pb_shared);
+ auto st = segment->_get_segment_footer(footer_pb_shared, nullptr);
EXPECT_TRUE(st.ok());
st = segment->_create_column_readers(*footer_pb_shared);
EXPECT_TRUE(st.ok());
@@ -230,9 +230,9 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) {
EXPECT_TRUE(_tablet->add_rowset(rowset).ok());
segment_v2::SegmentSharedPtr segment;
- EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok());
+ EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, nullptr,
&segment).ok());
std::shared_ptr<SegmentFooterPB> footer_pb_shared;
- auto st = segment->_get_segment_footer(footer_pb_shared);
+ auto st = segment->_get_segment_footer(footer_pb_shared, nullptr);
EXPECT_TRUE(st.ok());
st = segment->_create_column_readers(*(footer_pb_shared));
EXPECT_TRUE(st.ok());
diff --git a/be/test/olap/segment_footer_cache_test.cpp
b/be/test/olap/segment_footer_cache_test.cpp
index f0ed4960151..25b809c6055 100644
--- a/be/test/olap/segment_footer_cache_test.cpp
+++ b/be/test/olap/segment_footer_cache_test.cpp
@@ -151,13 +151,13 @@ private:
TEST_F(SegmentFooterCacheTest, TestGetSegmentFooter) {
for (auto segment_ptr : _segments) {
std::shared_ptr<segment_v2::SegmentFooterPB> footer;
- Status st = segment_ptr->_get_segment_footer(footer);
+ Status st = segment_ptr->_get_segment_footer(footer, nullptr);
ASSERT_TRUE(st.ok());
}
for (auto segment_ptr : _segments) {
std::shared_ptr<segment_v2::SegmentFooterPB> footer;
- Status st = segment_ptr->_get_segment_footer(footer);
+ Status st = segment_ptr->_get_segment_footer(footer, nullptr);
ASSERT_TRUE(st.ok());
}
}
@@ -179,7 +179,7 @@ TEST_F(SegmentFooterCacheTest, TestSemgnetFooterPBPage) {
StoragePageCache cache(16 * 2048, 0, 0, 16);
for (auto segment_ptr : _segments) {
std::shared_ptr<segment_v2::SegmentFooterPB> footer;
- Status st = segment_ptr->_get_segment_footer(footer);
+ Status st = segment_ptr->_get_segment_footer(footer, nullptr);
ASSERT_TRUE(st.ok());
PageCacheHandle cache_handle;
cache.insert(segment_ptr->get_segment_footer_cache_key(), footer,
footer->ByteSizeLong(),
@@ -192,4 +192,4 @@ TEST_F(SegmentFooterCacheTest, TestSemgnetFooterPBPage) {
}
}
-} // namespace doris
\ No newline at end of file
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]