This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch branch-2.0-var
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0-var by this push:
new 42cfd155203 [Improvement](inverted index) delay inverted index col
read to reduce IO (#26080)
42cfd155203 is described below
commit 42cfd1552036291251e3060db26ddf472a61d8a5
Author: Kang <[email protected]>
AuthorDate: Mon Oct 30 20:37:24 2023 +0800
[Improvement](inverted index) delay inverted index col read to reduce IO
(#26080)
[Improvement](inverted index) delay inverted index col read to reduce IO
(#26080)
---
be/src/olap/match_predicate.h | 2 +
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 67 +++++++++++++++++++++-
.../sql/dup_inverted_index_q18_string_match.out | 9 +++
.../sql/dup_inverted_index_q18_string_match.sql | 1 +
4 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/be/src/olap/match_predicate.h b/be/src/olap/match_predicate.h
index e3e2ca45bd5..4232469f4a6 100644
--- a/be/src/olap/match_predicate.h
+++ b/be/src/olap/match_predicate.h
@@ -48,6 +48,8 @@ public:
virtual PredicateType type() const override;
+ const std::string& get_value() const { return _value; }
+
//evaluate predicate on Bitmap
virtual Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows,
roaring::Roaring* roaring) const override {
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 75c8898ee03..b5e72b6586c 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -18,6 +18,7 @@
#include "olap/rowset/segment_v2/segment_iterator.h"
#include <assert.h>
+#include <gen_cpp/Exprs_types.h>
#include <gen_cpp/Types_types.h>
#include <gen_cpp/olap_file.pb.h>
@@ -27,6 +28,7 @@
#include <numeric>
#include <set>
#include <utility>
+#include <vector>
// IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
#include "common/compiler_util.h" // IWYU pragma: keep
@@ -42,6 +44,7 @@
#include "olap/field.h"
#include "olap/iterators.h"
#include "olap/like_column_predicate.h"
+#include "olap/match_predicate.h"
#include "olap/olap_common.h"
#include "olap/primary_key_index.h"
#include "olap/rowset/segment_v2/bitmap_index_reader.h"
@@ -1040,6 +1043,65 @@ Status SegmentIterator::_apply_inverted_index() {
}
}
}
+
+ // delete from _common_expr_ctxs_push_down if a MATCH predicate will be
removed from _col_predicates
+ // since it's not necessary to eval it any more to avoid index miss, which
is added in _normalize_predicate
+ for (auto pred : _col_predicates) {
+ if (pred->type() == PredicateType::MATCH &&
+ std::find(remaining_predicates.begin(),
remaining_predicates.end(), pred) ==
+ remaining_predicates.end()) {
+ // TODO: change dynamic_cast to static_cast in the future
+ MatchPredicate* match_pred = dynamic_cast<MatchPredicate*>(pred);
+ if (match_pred == nullptr) {
+ LOG(WARNING) << pred->debug_string() << " should be
MatchPredicate";
+ continue;
+ }
+ for (auto it = _common_expr_ctxs_push_down.begin();
+ it != _common_expr_ctxs_push_down.end(); it++) {
+ auto expr = (*it)->root().get();
+ // check expr type and child is the same as match predicate
+ if (expr->node_type() == TExprNodeType::MATCH_PRED &&
+ expr->children().size() == 2 &&
expr->get_child(0)->is_slot_ref() &&
+ expr->get_child(1)->is_constant()) {
+ // TODO: change dynamic_cast to static_cast in the future
+ auto slot_ref =
dynamic_cast<vectorized::VSlotRef*>(expr->get_child(0).get());
+ if (slot_ref == nullptr) {
+ LOG(WARNING) << expr->get_child(0)->debug_string() <<
" should be SlotRef";
+ continue;
+ }
+ std::shared_ptr<ColumnPtrWrapper> const_col_wrapper;
+ auto res = expr->get_child(1)->get_const_col((*it).get(),
&const_col_wrapper);
+ if (res.ok() && const_col_wrapper) {
+ const auto const_column =
check_and_get_column<vectorized::ColumnConst>(
+ const_col_wrapper->column_ptr);
+ if (const_column) {
+ // check column id and predicate value is the same
+ if ((match_pred->column_id() ==
+ _schema->column_id(slot_ref->column_id())) &&
+ (StringRef(match_pred->get_value()) ==
+ const_column->get_data_at(0))) {
+ // delete the expr from
_remaining_conjunct_roots and _common_expr_ctxs_push_down
+ for (auto it1 =
_remaining_conjunct_roots.begin();
+ it1 != _remaining_conjunct_roots.end();
it1++) {
+ if (it1->get() == expr) {
+ VLOG_DEBUG << "delete expr from
_remaining_conjunct_roots "
+ << expr->debug_string();
+ _remaining_conjunct_roots.erase(it1);
+ break;
+ }
+ }
+ VLOG_DEBUG << "delete expr from
_common_expr_ctxs_push_down "
+ << expr->debug_string();
+ _common_expr_ctxs_push_down.erase(it);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
_col_predicates = std::move(remaining_predicates);
_opts.stats->rows_inverted_index_filtered += (input_rows -
_row_bitmap.cardinality());
return Status::OK();
@@ -1109,8 +1171,9 @@ Status SegmentIterator::_init_inverted_index_iterators() {
int32_t unique_id = _opts.tablet_schema->column(cid).unique_id();
if (_inverted_index_iterators.count(unique_id) < 1) {
RETURN_IF_ERROR(_segment->new_inverted_index_iterator(
- _opts.tablet_schema->column(cid),
_opts.tablet_schema->get_inverted_index(cid),
- _opts, &_inverted_index_iterators[unique_id]));
+ _opts.tablet_schema->column(cid),
+ _opts.tablet_schema->get_inverted_index(unique_id), _opts,
+ &_inverted_index_iterators[unique_id]));
}
}
return Status::OK();
diff --git
a/regression-test/data/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.out
b/regression-test/data/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.out
new file mode 100644
index 00000000000..205c6ff5bb5
--- /dev/null
+++
b/regression-test/data/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.out
@@ -0,0 +1,9 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !dup_inverted_index_q18_string_match --
+-1916662625 false 78 -13460 21130 -2482 -1844741088
29340.85 -1.472252906087688E9 73541395097702267.459
30444229593217770.284 2022-01-14 2022-12-07T04:09:46 2022-06-25
2022-01-11T04:05:07 32.142.250.139 [email protected] Basil
Place 15
+-1839832100 true 47 -22682 13555 10540 -1114845607
20315.459 1.897044291188191E9 59057985418597209.391
42382795220020608.641 2022-09-25 2022-10-20T11:17:05 2022-04-03
2022-04-05T19:45:23 23.131.157.163 [email protected] Nobel Road 97
+-1787254128 false -86 5929 8007 91108 1646790590
-12748.728 -1.61835257126454E8 44884498199065915.707
90723335017080367.491 2022-09-12 2022-01-27T15:18:03 2022-02-28
2022-07-12T22:09:36 100.38.193.37 [email protected] Graedel Hill 56
+-1662034455 true -60 -31209 13551 18569 -7634871
6548.2676 -4.69704747046188E8 88115416211561753.125
67337349559631643.630 2022-06-24 2022-09-22T16:14:22 2022-08-05
2022-02-20T14:00:24 87.137.14.156 [email protected] Ridgeway Hill 34
+-1024780352 true -22 -8718 -31789 91547 1201680092
8814.243 -1.81266387539064E9 32726063178438464.987
55896571700759806.153 2022-11-07 2022-04-13T01:24:53 2022-05-25
2022-11-15T00:40:51 130.140.209.209 [email protected] Manley Pass 57
+-769400722 true -30 21454 27632 -64427 -1353560943
26450.154 1.823927555911853E9 64469528968198612.444
27416791107112909.849 2022-10-25 2022-06-15T23:07:05 2022-01-21
2022-07-04T16:30:44 207.143.171.175 [email protected]
Parkside Place 27
+
diff --git
a/regression-test/suites/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.sql
b/regression-test/suites/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.sql
new file mode 100644
index 00000000000..78d38225148
--- /dev/null
+++
b/regression-test/suites/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.sql
@@ -0,0 +1 @@
+SELECT * FROM tbl_scalar_types_dup_inverted_index WHERE c_string MATCH 'part
way road point hill trail crossing street court pass drive place circle plaza
Junction Junction Center Avenue Alley Lane' AND abs(c_bigint) < 100000 ORDER BY
k1;
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]