This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 12ed2951c47 [fix] (inverted index) remove tmp columns in block
(#39369) (#39533)
12ed2951c47 is described below
commit 12ed2951c47e26b462b28a5de5f235dd4d6cbb24
Author: Sun Chenyang <[email protected]>
AuthorDate: Tue Aug 20 20:53:23 2024 +0800
[fix] (inverted index) remove tmp columns in block (#39369) (#39533)
---
be/src/pipeline/exec/scan_operator.cpp | 7 +------
be/src/vec/core/block.cpp | 9 +++++++++
be/src/vec/core/block.h | 5 +++++
be/src/vec/exec/scan/vscanner.cpp | 11 +++--------
be/src/vec/olap/vcollect_iterator.cpp | 19 +++++--------------
.../suites/inverted_index_p0/topn_clear_block.groovy | 10 ++++++++++
6 files changed, 33 insertions(+), 28 deletions(-)
diff --git a/be/src/pipeline/exec/scan_operator.cpp
b/be/src/pipeline/exec/scan_operator.cpp
index 39a57bee25b..d88b778b45c 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -1486,12 +1486,7 @@ Status
ScanOperatorX<LocalStateType>::get_block(RuntimeState* state, vectorized:
// remove them when query leave scan node to avoid other nodes use
block->columns() to make a wrong decision
Defer drop_block_temp_column {[&]() {
std::unique_lock l(local_state._block_lock);
- auto all_column_names = block->get_names();
- for (auto& name : all_column_names) {
- if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
- block->erase(name);
- }
- }
+ block->erase_tmp_columns();
}};
if (state->is_cancelled()) {
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 5124ff248e9..29c6d21bb78 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -738,6 +738,15 @@ void Block::clear_column_data(int column_size) noexcept {
row_same_bit.clear();
}
+void Block::erase_tmp_columns() noexcept {
+ auto all_column_names = get_names();
+ for (auto& name : all_column_names) {
+ if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
+ erase(name);
+ }
+ }
+}
+
void Block::swap(Block& other) noexcept {
SCOPED_SKIP_MEMORY_CHECK();
data.swap(other.data);
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index 38eb495296f..a8a4d07b49d 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -392,6 +392,11 @@ public:
// for debug purpose.
std::string print_use_count();
+ // remove tmp columns in block
+ // in inverted index apply logic, in order to optimize query performance,
+ // we built some temporary columns into block
+ void erase_tmp_columns() noexcept;
+
private:
void erase_impl(size_t position);
};
diff --git a/be/src/vec/exec/scan/vscanner.cpp
b/be/src/vec/exec/scan/vscanner.cpp
index 79fa4019687..d8ae052e185 100644
--- a/be/src/vec/exec/scan/vscanner.cpp
+++ b/be/src/vec/exec/scan/vscanner.cpp
@@ -132,6 +132,8 @@ Status VScanner::get_block(RuntimeState* state, Block*
block, bool* eof) {
RETURN_IF_ERROR(_get_block_impl(state, block, eof));
if (*eof) {
DCHECK(block->rows() == 0);
+ // clear TEMP columns to avoid column align problem
+ block->erase_tmp_columns();
break;
}
_num_rows_read += block->rows();
@@ -167,14 +169,7 @@ Status VScanner::get_block(RuntimeState* state, Block*
block, bool* eof) {
}
Status VScanner::_filter_output_block(Block* block) {
- Defer clear_tmp_block([&]() {
- auto all_column_names = block->get_names();
- for (auto& name : all_column_names) {
- if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
- block->erase(name);
- }
- }
- });
+ Defer clear_tmp_block([&]() { block->erase_tmp_columns(); });
if (block->has(BeConsts::BLOCK_TEMP_COLUMN_SCANNER_FILTERED)) {
// scanner filter_block is already done (only by _topn_next
currently), just skip it
return Status::OK();
diff --git a/be/src/vec/olap/vcollect_iterator.cpp
b/be/src/vec/olap/vcollect_iterator.cpp
index 10ac1e236b2..80a23925961 100644
--- a/be/src/vec/olap/vcollect_iterator.cpp
+++ b/be/src/vec/olap/vcollect_iterator.cpp
@@ -257,18 +257,7 @@ Status VCollectIterator::_topn_next(Block* block) {
}
// clear TEMP columns to avoid column align problem
- auto clear_temp_columns = [](Block* block) {
- auto all_column_names = block->get_names();
- for (auto& name : all_column_names) {
- if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
- // clear TEMP columns from block to prevent from storage
engine merge with this
- // fake column
- block->erase(name);
- }
- }
- };
-
- clear_temp_columns(block);
+ block->erase_tmp_columns();
auto clone_block = block->clone_empty();
MutableBlock mutable_block =
vectorized::MutableBlock::build_mutable_block(&clone_block);
@@ -305,7 +294,7 @@ Status VCollectIterator::_topn_next(Block* block) {
eof = true;
if (block->rows() == 0) {
// clear TEMP columns to avoid column align problem in
segment iterator
- clear_temp_columns(block);
+ block->erase_tmp_columns();
break;
}
} else {
@@ -317,7 +306,7 @@ Status VCollectIterator::_topn_next(Block* block) {
RETURN_IF_ERROR(VExprContext::filter_block(
_reader->_reader_context.filter_block_conjuncts, block,
block->columns()));
// clear TMPE columns to avoid column align problem in
mutable_block.add_rows bellow
- clear_temp_columns(block);
+ block->erase_tmp_columns();
// update read rows
read_rows += block->rows();
@@ -864,6 +853,8 @@ Status
VCollectIterator::Level1Iterator::_normal_next(Block* block) {
if (!_children.empty()) {
_cur_child = std::move(*(_children.begin()));
_children.pop_front();
+ // clear TEMP columns to avoid column align problem
+ block->erase_tmp_columns();
return _normal_next(block);
} else {
_cur_child.reset();
diff --git a/regression-test/suites/inverted_index_p0/topn_clear_block.groovy
b/regression-test/suites/inverted_index_p0/topn_clear_block.groovy
index 7486a658d60..586173e0f3e 100644
--- a/regression-test/suites/inverted_index_p0/topn_clear_block.groovy
+++ b/regression-test/suites/inverted_index_p0/topn_clear_block.groovy
@@ -78,7 +78,17 @@ suite("test_clear_block") {
sql """ delete from dup_httplogs where size = 24736; """
sql """ delete from dup_httplogs where request = 'GET /images/hm_bg.jpg
HTTP/1.0'; """
+ sql """ set enable_match_without_inverted_index = false """
sql """ sync """
qt_sql """ SELECT clientip from ${dupTableName} WHERE clientip NOT IN
(NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 2 """
+
+ def result1 = sql """ SELECT clientip from ${dupTableName} WHERE clientip
NOT IN (NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 5000 """
+ def result2 = sql """ SELECT clientip from ${dupTableName} WHERE clientip
NOT IN (NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 5000 """
+ if (result1 != result2) {
+ logger.info("result1 is: {}", result1)
+ logger.info("result2 is: {}", result2)
+ assertTrue(false)
+ }
+
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]