This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4efb1689f77c5a81565db821b0e7b63058a7d412 Author: Yongqiang YANG <[email protected]> AuthorDate: Tue Aug 29 09:34:27 2023 +0800 [improvement](column_reader) move load once to index reader to reduce (#23537) memory footprint of column reader --- .../olap/rowset/segment_v2/bitmap_index_reader.cpp | 17 ++++++++++++---- .../olap/rowset/segment_v2/bitmap_index_reader.h | 14 +++++++------ .../segment_v2/bloom_filter_index_reader.cpp | 7 +++++++ .../rowset/segment_v2/bloom_filter_index_reader.h | 5 +++++ be/src/olap/rowset/segment_v2/column_reader.cpp | 23 +++++++--------------- be/src/olap/rowset/segment_v2/column_reader.h | 6 ------ .../olap/rowset/segment_v2/ordinal_page_index.cpp | 17 ++++++++++++---- be/src/olap/rowset/segment_v2/ordinal_page_index.h | 16 ++++++++------- be/src/olap/rowset/segment_v2/zone_map_index.cpp | 13 ++++++++++-- be/src/olap/rowset/segment_v2/zone_map_index.h | 14 ++++++++----- .../olap/rowset/segment_v2/bitmap_index_test.cpp | 4 ++-- .../rowset/segment_v2/ordinal_page_index_test.cpp | 8 ++++---- .../olap/rowset/segment_v2/zone_map_index_test.cpp | 12 +++++------ 13 files changed, 94 insertions(+), 62 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp b/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp index 0e2c26ff30..69dbf05356 100644 --- a/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp @@ -32,10 +32,19 @@ namespace doris { namespace segment_v2 { -Status BitmapIndexReader::load(bool use_page_cache, bool kept_in_memory) { - const IndexedColumnMetaPB& dict_meta = _bitmap_index_meta->dict_column(); - const IndexedColumnMetaPB& bitmap_meta = _bitmap_index_meta->bitmap_column(); - _has_null = _bitmap_index_meta->has_null(); +Status BitmapIndexReader::load(bool use_page_cache, bool kept_in_memory, + const BitmapIndexPB* index_meta) { + // TODO yyq: implement a new once flag to avoid status construct. + return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] { + return _load(use_page_cache, kept_in_memory, index_meta); + }); +} + +Status BitmapIndexReader::_load(bool use_page_cache, bool kept_in_memory, + const BitmapIndexPB* index_meta) { + const IndexedColumnMetaPB& dict_meta = index_meta->dict_column(); + const IndexedColumnMetaPB& bitmap_meta = index_meta->bitmap_column(); + _has_null = index_meta->has_null(); _dict_column_reader.reset(new IndexedColumnReader(_file_reader, dict_meta)); _bitmap_column_reader.reset(new IndexedColumnReader(_file_reader, bitmap_meta)); diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h index c448c9f5cc..1ac1643989 100644 --- a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h +++ b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h @@ -28,6 +28,7 @@ #include "olap/rowset/segment_v2/common.h" #include "olap/rowset/segment_v2/indexed_column_reader.h" #include "olap/types.h" +#include "util/once.h" namespace roaring { class Roaring; @@ -42,13 +43,11 @@ class BitmapIndexPB; class BitmapIndexReader { public: - explicit BitmapIndexReader(io::FileReaderSPtr file_reader, - const BitmapIndexPB* bitmap_index_meta) + explicit BitmapIndexReader(io::FileReaderSPtr file_reader) : _file_reader(std::move(file_reader)), - _type_info(get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>()), - _bitmap_index_meta(bitmap_index_meta) {} + _type_info(get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>()) {} - Status load(bool use_page_cache, bool kept_in_memory); + Status load(bool use_page_cache, bool kept_in_memory, const BitmapIndexPB*); // create a new column iterator. Client should delete returned iterator Status new_iterator(BitmapIndexIterator** iterator); @@ -57,13 +56,16 @@ public: const TypeInfo* type_info() { return _type_info; } +private: + Status _load(bool use_page_cache, bool kept_in_memory, const BitmapIndexPB*); + private: friend class BitmapIndexIterator; io::FileReaderSPtr _file_reader; const TypeInfo* _type_info; - const BitmapIndexPB* _bitmap_index_meta; bool _has_null = false; + DorisCallOnce<Status> _load_once; std::unique_ptr<IndexedColumnReader> _dict_column_reader; std::unique_ptr<IndexedColumnReader> _bitmap_column_reader; }; diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp index e3ae352dd7..dd663b0175 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp @@ -31,6 +31,13 @@ namespace doris { namespace segment_v2 { Status BloomFilterIndexReader::load(bool use_page_cache, bool kept_in_memory) { + // TODO yyq: implement a new once flag to avoid status construct. + return _load_once.call([this, use_page_cache, kept_in_memory] { + return _load(use_page_cache, kept_in_memory); + }); +} + +Status BloomFilterIndexReader::_load(bool use_page_cache, bool kept_in_memory) { const IndexedColumnMetaPB& bf_index_meta = _bloom_filter_index_meta->bloom_filter(); _bloom_filter_reader.reset(new IndexedColumnReader(_file_reader, bf_index_meta)); diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h index 452a7ea2ae..dc45e4f692 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h @@ -28,6 +28,7 @@ #include "olap/rowset/segment_v2/common.h" #include "olap/rowset/segment_v2/indexed_column_reader.h" #include "olap/types.h" +#include "util/once.h" namespace doris { @@ -52,10 +53,14 @@ public: const TypeInfo* type_info() const { return _type_info; } +private: + Status _load(bool use_page_cache, bool kept_in_memory); + private: friend class BloomFilterIndexIterator; io::FileReaderSPtr _file_reader; + DorisCallOnce<Status> _load_once; const TypeInfo* _type_info; const BloomFilterIndexPB* _bloom_filter_index_meta; std::unique_ptr<IndexedColumnReader> _bloom_filter_reader; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 30d593ba80..ae823d76ba 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -199,16 +199,15 @@ Status ColumnReader::init() { switch (index_meta.type()) { case ORDINAL_INDEX: _ordinal_index_meta = &index_meta.ordinal_index(); - _ordinal_index.reset( - new OrdinalIndexReader(_file_reader, _ordinal_index_meta, _num_rows)); + _ordinal_index.reset(new OrdinalIndexReader(_file_reader, _num_rows)); break; case ZONE_MAP_INDEX: _zone_map_index_meta = &index_meta.zone_map_index(); - _zone_map_index.reset(new ZoneMapIndexReader(_file_reader, _zone_map_index_meta)); + _zone_map_index.reset(new ZoneMapIndexReader(_file_reader)); break; case BITMAP_INDEX: _bitmap_index_meta = &index_meta.bitmap_index(); - _bitmap_index.reset(new BitmapIndexReader(_file_reader, _bitmap_index_meta)); + _bitmap_index.reset(new BitmapIndexReader(_file_reader)); break; case BLOOM_FILTER_INDEX: _bf_index_meta = &index_meta.bloom_filter_index(); @@ -466,25 +465,19 @@ Status ColumnReader::get_row_ranges_by_bloom_filter(const AndBlockColumnPredicat Status ColumnReader::_load_ordinal_index(bool use_page_cache, bool kept_in_memory) { DCHECK(_ordinal_index_meta != nullptr); - return _load_ordinal_index_once.call([this, use_page_cache, kept_in_memory] { - return _ordinal_index->load(use_page_cache, kept_in_memory); - }); + return _ordinal_index->load(use_page_cache, kept_in_memory, _ordinal_index_meta); } Status ColumnReader::_load_zone_map_index(bool use_page_cache, bool kept_in_memory) { if (_zone_map_index_meta != nullptr) { - return _load_zone_map_index_once.call([this, use_page_cache, kept_in_memory] { - return _zone_map_index->load(use_page_cache, kept_in_memory); - }); + return _zone_map_index->load(use_page_cache, kept_in_memory, _zone_map_index_meta); } return Status::OK(); } Status ColumnReader::_load_bitmap_index(bool use_page_cache, bool kept_in_memory) { if (_bitmap_index_meta != nullptr) { - return _load_bitmap_index_once.call([this, use_page_cache, kept_in_memory] { - return _bitmap_index->load(use_page_cache, kept_in_memory); - }); + return _bitmap_index->load(use_page_cache, kept_in_memory, _bitmap_index_meta); } return Status::OK(); } @@ -527,9 +520,7 @@ Status ColumnReader::_load_inverted_index_index(const TabletIndex* index_meta) { Status ColumnReader::_load_bloom_filter_index(bool use_page_cache, bool kept_in_memory) { if (_bf_index_meta != nullptr) { - return _load_bloom_filter_index_once.call([this, use_page_cache, kept_in_memory] { - return _bloom_filter_index->load(use_page_cache, kept_in_memory); - }); + return _bloom_filter_index->load(use_page_cache, kept_in_memory); } return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index fb212ef33d..828841f23c 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -247,15 +247,9 @@ private: std::unique_ptr<BitmapIndexReader> _bitmap_index; std::shared_ptr<InvertedIndexReader> _inverted_index; std::unique_ptr<BloomFilterIndexReader> _bloom_filter_index; - DorisCallOnce<Status> _load_zone_map_index_once; - DorisCallOnce<Status> _load_ordinal_index_once; - DorisCallOnce<Status> _load_bitmap_index_once; - DorisCallOnce<Status> _load_bloom_filter_index_once; - DorisCallOnce<Status> _load_inverted_index_once; std::vector<std::unique_ptr<ColumnReader>> _sub_readers; - std::once_flag _set_dict_encoding_type_flag; DorisCallOnce<Status> _set_dict_encoding_type_once; }; diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp index d977e29f11..d3d6d9cb9c 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp @@ -67,19 +67,28 @@ Status OrdinalIndexWriter::finish(io::FileWriter* file_writer, ColumnIndexMetaPB return Status::OK(); } -Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory) { - if (_index_meta->root_page().is_root_data_page()) { +Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory, + const OrdinalIndexPB* index_meta) { + // TODO yyq: implement a new once flag to avoid status construct. + return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] { + return _load(use_page_cache, kept_in_memory, index_meta); + }); +} + +Status OrdinalIndexReader::_load(bool use_page_cache, bool kept_in_memory, + const OrdinalIndexPB* index_meta) { + if (index_meta->root_page().is_root_data_page()) { // only one data page, no index page _num_pages = 1; _ordinals.push_back(0); _ordinals.push_back(_num_values); - _pages.emplace_back(_index_meta->root_page().root_page()); + _pages.emplace_back(index_meta->root_page().root_page()); return Status::OK(); } // need to read index page PageReadOptions opts; opts.file_reader = _file_reader.get(); - opts.page_pointer = PagePointer(_index_meta->root_page().root_page()); + opts.page_pointer = PagePointer(index_meta->root_page().root_page()); opts.codec = nullptr; // ordinal index page uses NO_COMPRESSION right now OlapReaderStatistics tmp_stats; opts.stats = &tmp_stats; diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.h b/be/src/olap/rowset/segment_v2/ordinal_page_index.h index 17dba9e675..84a25c78a7 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.h +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.h @@ -30,6 +30,7 @@ #include "olap/rowset/segment_v2/common.h" #include "olap/rowset/segment_v2/index_page.h" #include "olap/rowset/segment_v2/page_pointer.h" +#include "util/once.h" namespace doris { @@ -65,14 +66,11 @@ class OrdinalPageIndexIterator; class OrdinalIndexReader { public: - explicit OrdinalIndexReader(io::FileReaderSPtr file_reader, const OrdinalIndexPB* index_meta, - ordinal_t num_values) - : _file_reader(std::move(file_reader)), - _index_meta(index_meta), - _num_values(num_values) {} + explicit OrdinalIndexReader(io::FileReaderSPtr file_reader, ordinal_t num_values) + : _file_reader(std::move(file_reader)), _num_values(num_values) {} // load and parse the index page into memory - Status load(bool use_page_cache, bool kept_in_memory); + Status load(bool use_page_cache, bool kept_in_memory, const OrdinalIndexPB* index_meta); // the returned iter points to the largest element which is less than `ordinal`, // or points to the first element if all elements are greater than `ordinal`, @@ -89,11 +87,15 @@ public: // for test int32_t num_data_pages() const { return _num_pages; } +private: + Status _load(bool use_page_cache, bool kept_in_memory, const OrdinalIndexPB* index_meta); + private: friend OrdinalPageIndexIterator; io::FileReaderSPtr _file_reader; - const OrdinalIndexPB* _index_meta; + DorisCallOnce<Status> _load_once; + // total number of values (including NULLs) in the indexed column, // equals to 1 + 'last ordinal of last data pages' ordinal_t _num_values; diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.cpp b/be/src/olap/rowset/segment_v2/zone_map_index.cpp index 9a4ec628c2..4306224c13 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.cpp +++ b/be/src/olap/rowset/segment_v2/zone_map_index.cpp @@ -145,8 +145,17 @@ Status TypedZoneMapIndexWriter<Type>::finish(io::FileWriter* file_writer, return writer.finish(meta->mutable_page_zone_maps()); } -Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory) { - IndexedColumnReader reader(_file_reader, _index_meta->page_zone_maps()); +Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory, + const ZoneMapIndexPB* index_meta) { + // TODO yyq: implement a new once flag to avoid status construct. + return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] { + return _load(use_page_cache, kept_in_memory, index_meta); + }); +} + +Status ZoneMapIndexReader::_load(bool use_page_cache, bool kept_in_memory, + const ZoneMapIndexPB* index_meta) { + IndexedColumnReader reader(_file_reader, index_meta->page_zone_maps()); RETURN_IF_ERROR(reader.load(use_page_cache, kept_in_memory)); IndexedColumnIterator iter(&reader); diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.h b/be/src/olap/rowset/segment_v2/zone_map_index.h index ed3418dc48..b6a6ae9075 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.h +++ b/be/src/olap/rowset/segment_v2/zone_map_index.h @@ -30,6 +30,7 @@ #include "io/fs/file_reader_writer_fwd.h" #include "olap/field.h" #include "runtime/define_primitive_type.h" +#include "util/once.h" #include "vec/common/arena.h" namespace doris { @@ -146,20 +147,23 @@ private: class ZoneMapIndexReader { public: - explicit ZoneMapIndexReader(io::FileReaderSPtr file_reader, const ZoneMapIndexPB* index_meta) - : _file_reader(std::move(file_reader)), _index_meta(index_meta) {} + explicit ZoneMapIndexReader(io::FileReaderSPtr file_reader) + : _file_reader(std::move(file_reader)) {} // load all page zone maps into memory - Status load(bool use_page_cache, bool kept_in_memory); + Status load(bool use_page_cache, bool kept_in_memory, const ZoneMapIndexPB*); const std::vector<ZoneMapPB>& page_zone_maps() const { return _page_zone_maps; } int32_t num_pages() const { return _page_zone_maps.size(); } private: - io::FileReaderSPtr _file_reader; - const ZoneMapIndexPB* _index_meta; + Status _load(bool use_page_cache, bool kept_in_memory, const ZoneMapIndexPB*); +private: + DorisCallOnce<Status> _load_once; + // TODO: yyq, we shoud remove file_reader from here. + io::FileReaderSPtr _file_reader; std::vector<ZoneMapPB> _page_zone_maps; }; diff --git a/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp b/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp index 2d79776c3b..2e803e4ff1 100644 --- a/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp @@ -80,8 +80,8 @@ void get_bitmap_reader_iter(const std::string& file_name, const ColumnIndexMetaP BitmapIndexReader** reader, BitmapIndexIterator** iter) { io::FileReaderSPtr file_reader; ASSERT_EQ(io::global_local_filesystem()->open_file(file_name, &file_reader), Status::OK()); - *reader = new BitmapIndexReader(std::move(file_reader), &meta.bitmap_index()); - auto st = (*reader)->load(true, false); + *reader = new BitmapIndexReader(std::move(file_reader)); + auto st = (*reader)->load(true, false, &meta.bitmap_index()); EXPECT_TRUE(st.ok()); st = (*reader)->new_iterator(iter); diff --git a/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp b/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp index d10c1dde83..a5024f4147 100644 --- a/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp @@ -70,8 +70,8 @@ TEST_F(OrdinalPageIndexTest, normal) { io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - OrdinalIndexReader index(file_reader, &index_meta.ordinal_index(), 16 * 1024 * 4096 + 1); - EXPECT_TRUE(index.load(true, false).ok()); + OrdinalIndexReader index(file_reader, 16 * 1024 * 4096 + 1); + EXPECT_TRUE(index.load(true, false, &index_meta.ordinal_index()).ok()); EXPECT_EQ(16 * 1024, index.num_data_pages()); EXPECT_EQ(1, index.get_first_ordinal(0)); EXPECT_EQ(4096, index.get_last_ordinal(0)); @@ -124,8 +124,8 @@ TEST_F(OrdinalPageIndexTest, one_data_page) { EXPECT_EQ(data_page_pointer, root_page_pointer); } - OrdinalIndexReader index(nullptr, &index_meta.ordinal_index(), num_values); - EXPECT_TRUE(index.load(true, false).ok()); + OrdinalIndexReader index(nullptr, num_values); + EXPECT_TRUE(index.load(true, false, &index_meta.ordinal_index()).ok()); EXPECT_EQ(1, index.num_data_pages()); EXPECT_EQ(0, index.get_first_ordinal(0)); EXPECT_EQ(num_values - 1, index.get_last_ordinal(0)); diff --git a/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp b/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp index 1d9345f908..22b69c5cae 100644 --- a/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp @@ -81,8 +81,8 @@ public: io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - ZoneMapIndexReader column_zone_map(file_reader, &index_meta.zone_map_index()); - Status status = column_zone_map.load(true, false); + ZoneMapIndexReader column_zone_map(file_reader); + Status status = column_zone_map.load(true, false, &index_meta.zone_map_index()); EXPECT_TRUE(status.ok()); EXPECT_EQ(3, column_zone_map.num_pages()); const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps(); @@ -128,8 +128,8 @@ public: io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - ZoneMapIndexReader column_zone_map(file_reader, &index_meta.zone_map_index()); - Status status = column_zone_map.load(true, false); + ZoneMapIndexReader column_zone_map(file_reader); + Status status = column_zone_map.load(true, false, &index_meta.zone_map_index()); EXPECT_TRUE(status.ok()); EXPECT_EQ(1, column_zone_map.num_pages()); const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps(); @@ -181,8 +181,8 @@ TEST_F(ColumnZoneMapTest, NormalTestIntPage) { io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - ZoneMapIndexReader column_zone_map(file_reader, &index_meta.zone_map_index()); - Status status = column_zone_map.load(true, false); + ZoneMapIndexReader column_zone_map(file_reader); + Status status = column_zone_map.load(true, false, &index_meta.zone_map_index()); EXPECT_TRUE(status.ok()); EXPECT_EQ(3, column_zone_map.num_pages()); const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps(); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
