This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9756be6bf0 [improvement](stream-load) use vector instead of skiplist
when insert dup keys (#18686)
9756be6bf0 is described below
commit 9756be6bf04f4fbf6234885c7cd5223350af07d4
Author: huanghaibin <[email protected]>
AuthorDate: Sun Apr 23 09:40:09 2023 +0800
[improvement](stream-load) use vector instead of skiplist when insert dup
keys (#18686)
---
be/src/olap/memtable.cpp | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index 1b582b2064..b42d524ef5 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -214,8 +214,7 @@ void MemTable::_insert_one_row_from_block(RowInBlock*
row_in_block) {
_rows++;
bool overwritten = false;
if (_keys_type == KeysType::DUP_KEYS) {
- // TODO: dup keys only need sort opertaion. Rethink skiplist is the
beat way to sort columns?
- _vec_skip_list->Insert(row_in_block, &overwritten);
+ // for dup keys, already store row_in_block in vector and will sort it
on flush stage.
DCHECK(!overwritten) << "Duplicate key model meet overwrite in
SkipList";
return;
}
@@ -266,11 +265,23 @@ void MemTable::_collect_vskiplist_results() {
VecTable::Iterator it(_vec_skip_list.get());
vectorized::Block in_block = _input_mutable_block.to_block();
if (_keys_type == KeysType::DUP_KEYS) {
+ vectorized::MutableBlock mutable_block =
+ vectorized::MutableBlock::build_mutable_block(&in_block);
+ _vec_row_comparator->set_block(&mutable_block);
+ std::sort(_row_in_blocks.begin(), _row_in_blocks.end(),
+ [this](const RowInBlock* l, const RowInBlock* r) -> bool {
+ auto value = (*(this->_vec_row_comparator))(l, r);
+ if (value == 0) {
+ return l->_row_pos > r->_row_pos;
+ } else {
+ return value < 0;
+ }
+ });
std::vector<int> row_pos_vec;
DCHECK(in_block.rows() <= std::numeric_limits<int>::max());
row_pos_vec.reserve(in_block.rows());
- for (it.SeekToFirst(); it.Valid(); it.Next()) {
- row_pos_vec.emplace_back(it.key()->_row_pos);
+ for (int i = 0; i < _row_in_blocks.size(); i++) {
+ row_pos_vec.emplace_back(_row_in_blocks[i]->_row_pos);
}
_output_mutable_block.add_rows(&in_block, row_pos_vec.data(),
row_pos_vec.data() + in_block.rows());
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]