This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6a1c7fac9d [enhancement](load) shrink reserved buffer for page builder
(#14012) (#14014)
6a1c7fac9d is described below
commit 6a1c7fac9dfcd7ee094133737f8fb2cc65d50f1c
Author: zhengyu <[email protected]>
AuthorDate: Wed Nov 9 08:40:07 2022 +0800
[enhancement](load) shrink reserved buffer for page builder (#14012)
(#14014)
* [enhancement](load) shrink reserved buffer for page builder (#14012)
For table with hundreds of text type columns, flushing its memtable may
cost huge memory.
These memory are consumed when initializing page builder, as it reserves
1MB for each column.
So memory consumption grows in proportion with column number. Shrinking the
reservation may
reduce memory substantially in load process.
Signed-off-by: freemandealer <[email protected]>
* response to the review
Signed-off-by: freemandealer <[email protected]>
* Update binary_plain_page.h
* Update binary_dict_page.cpp
* Update binary_plain_page.h
Signed-off-by: freemandealer <[email protected]>
---
be/src/olap/rowset/segment_v2/binary_dict_page.cpp | 8 +++++++-
be/src/olap/rowset/segment_v2/binary_plain_page.h | 14 +++++++++++---
be/src/olap/rowset/segment_v2/options.h | 2 ++
3 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
index b8d3fa98c6..f86c16f134 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@@ -37,7 +37,9 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const
PageBuilderOptions& options)
// TODO: the data page builder type can be created by Factory according to
user config
_data_page_builder.reset(new
BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>(options));
PageBuilderOptions dict_builder_options;
- dict_builder_options.data_page_size = _options.dict_page_size;
+ dict_builder_options.data_page_size =
+ std::min(_options.data_page_size, _options.dict_page_size);
+ dict_builder_options.is_dict_page = true;
_dict_builder.reset(new
BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options));
reset();
}
@@ -118,6 +120,10 @@ Status BinaryDictPageBuilder::add(const uint8_t* vals,
size_t* count) {
}
OwnedSlice BinaryDictPageBuilder::finish() {
+ if (VLOG_DEBUG_IS_ON && _encoding_type == DICT_ENCODING) {
+ VLOG_DEBUG << "dict page size:" << _dict_builder->size();
+ }
+
DCHECK(!_finished);
_finished = true;
diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h
b/be/src/olap/rowset/segment_v2/binary_plain_page.h
index 96cfc392a5..9faaeab3eb 100644
--- a/be/src/olap/rowset/segment_v2/binary_plain_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h
@@ -53,8 +53,14 @@ public:
}
bool is_page_full() override {
- // data_page_size is 0, do not limit the page size
- return _options.data_page_size != 0 && _size_estimate >
_options.data_page_size;
+ bool ret = false;
+ if (_options.is_dict_page) {
+ // dict_page_size is 0, do not limit the page size
+ ret = _options.dict_page_size != 0 && _size_estimate >
_options.dict_page_size;
+ } else {
+ ret = _options.data_page_size != 0 && _size_estimate >
_options.data_page_size;
+ }
+ return ret;
}
Status add(const uint8_t* vals, size_t* count) override {
@@ -104,7 +110,9 @@ public:
void reset() override {
_offsets.clear();
_buffer.clear();
- _buffer.reserve(_options.data_page_size == 0 ? 1024 :
_options.data_page_size);
+ _buffer.reserve(_options.data_page_size == 0
+ ? 1024
+ : std::min(_options.data_page_size,
_options.dict_page_size));
_size_estimate = sizeof(uint32_t);
_finished = false;
_last_value_size = 0;
diff --git a/be/src/olap/rowset/segment_v2/options.h
b/be/src/olap/rowset/segment_v2/options.h
index 9405eb19cf..19041f4c51 100644
--- a/be/src/olap/rowset/segment_v2/options.h
+++ b/be/src/olap/rowset/segment_v2/options.h
@@ -30,6 +30,8 @@ struct PageBuilderOptions {
size_t dict_page_size = DEFAULT_PAGE_SIZE;
bool need_check_bitmap = true;
+
+ bool is_dict_page = false; // page used for saving dictionary
};
struct PageDecoderOptions {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]