This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 171404228f [improvement](vertical compaction) cache segment in
vertical compaction (#16101)
171404228f is described below
commit 171404228f7b80d161c2aad6a3224234512c3c5a
Author: yixiutt <[email protected]>
AuthorDate: Fri Jan 20 16:38:23 2023 +0800
[improvement](vertical compaction) cache segment in vertical compaction
(#16101)
1.In vertical compaction, segments will be loaded for every column group, so
we should cache segment ptr to avoid too many repeated io.
2.fix vertical compaction data size bug
---
be/src/olap/rowset/beta_rowset_reader.cpp | 10 ++++++----
be/src/olap/rowset/beta_rowset_reader.h | 3 ++-
be/src/olap/rowset/rowset_reader.h | 3 ++-
be/src/olap/rowset/segment_v2/segment_writer.cpp | 3 +++
be/src/vec/olap/vertical_block_reader.cpp | 4 +++-
5 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index 6612fff758..3b45b3cf3f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -50,7 +50,8 @@ bool BetaRowsetReader::update_profile(RuntimeProfile*
profile) {
}
Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext*
read_context,
- std::vector<RowwiseIterator*>*
out_iters) {
+ std::vector<RowwiseIterator*>*
out_iters,
+ bool use_cache) {
RETURN_NOT_OK(_rowset->load());
_context = read_context;
if (_context->stats != nullptr) {
@@ -163,9 +164,10 @@ Status
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
_read_options.runtime_state = read_context->runtime_state;
// load segments
- RETURN_NOT_OK(SegmentLoader::instance()->load_segments(
- _rowset, &_segment_cache_handle,
- read_context->reader_type == ReaderType::READER_QUERY));
+ // use cache is true when do vertica compaction
+ bool should_use_cache = use_cache || read_context->reader_type ==
ReaderType::READER_QUERY;
+ RETURN_NOT_OK(SegmentLoader::instance()->load_segments(_rowset,
&_segment_cache_handle,
+ should_use_cache));
// create iterator for each segment
std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators;
diff --git a/be/src/olap/rowset/beta_rowset_reader.h
b/be/src/olap/rowset/beta_rowset_reader.h
index 889a7c2742..dea6814558 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -34,7 +34,8 @@ public:
Status init(RowsetReaderContext* read_context) override;
Status get_segment_iterators(RowsetReaderContext* read_context,
- std::vector<RowwiseIterator*>* out_iters)
override;
+ std::vector<RowwiseIterator*>* out_iters,
+ bool use_cache = false) override;
void reset_read_options() override;
Status next_block(vectorized::Block* block) override;
Status next_block_view(vectorized::BlockView* block_view) override;
diff --git a/be/src/olap/rowset/rowset_reader.h
b/be/src/olap/rowset/rowset_reader.h
index d1601d379a..4186088fe5 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -44,7 +44,8 @@ public:
virtual Status init(RowsetReaderContext* read_context) = 0;
virtual Status get_segment_iterators(RowsetReaderContext* read_context,
- std::vector<RowwiseIterator*>*
out_iters) = 0;
+ std::vector<RowwiseIterator*>*
out_iters,
+ bool use_cache = false) = 0;
virtual void reset_read_options() = 0;
virtual Status next_block(vectorized::Block* block) = 0;
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index ec62749c5c..c3e066db72 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -433,6 +433,9 @@ Status SegmentWriter::finalize_columns_index(uint64_t*
index_size) {
Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) {
RETURN_IF_ERROR(_write_footer());
+ // finish
+ RETURN_IF_ERROR(_file_writer->finalize());
+ *segment_file_size = _file_writer->bytes_appended();
return Status::OK();
}
diff --git a/be/src/vec/olap/vertical_block_reader.cpp
b/be/src/vec/olap/vertical_block_reader.cpp
index dc7d3b58c6..897d001c0d 100644
--- a/be/src/vec/olap/vertical_block_reader.cpp
+++ b/be/src/vec/olap/vertical_block_reader.cpp
@@ -55,7 +55,9 @@ Status VerticalBlockReader::_get_segment_iterators(const
ReaderParams& read_para
_reader_context.is_vertical_compaction = true;
for (auto& rs_reader : rs_readers) {
// segment iterator will be inited here
- RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context,
segment_iters));
+ // In vertical compaction, every group will load segment so we should
cache
+ // segment to avoid tot many s3 head request
+ RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context,
segment_iters, true));
// if segments overlapping, all segment iterator should be inited in
// heap merge iterator. If segments are none overlapping, only first
segment of this
// rowset will be inited and push to heap, other segment will be
inited later when current
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]