This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch branch-2.0-var
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0-var by this push:
     new 42cfd155203 [Improvement](inverted index) delay inverted index col 
read to reduce IO (#26080)
42cfd155203 is described below

commit 42cfd1552036291251e3060db26ddf472a61d8a5
Author: Kang <[email protected]>
AuthorDate: Mon Oct 30 20:37:24 2023 +0800

    [Improvement](inverted index) delay inverted index col read to reduce IO 
(#26080)
    
    [Improvement](inverted index) delay inverted index col read to reduce IO 
(#26080)
---
 be/src/olap/match_predicate.h                      |  2 +
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 67 +++++++++++++++++++++-
 .../sql/dup_inverted_index_q18_string_match.out    |  9 +++
 .../sql/dup_inverted_index_q18_string_match.sql    |  1 +
 4 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/match_predicate.h b/be/src/olap/match_predicate.h
index e3e2ca45bd5..4232469f4a6 100644
--- a/be/src/olap/match_predicate.h
+++ b/be/src/olap/match_predicate.h
@@ -48,6 +48,8 @@ public:
 
     virtual PredicateType type() const override;
 
+    const std::string& get_value() const { return _value; }
+
     //evaluate predicate on Bitmap
     virtual Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows,
                             roaring::Roaring* roaring) const override {
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 75c8898ee03..b5e72b6586c 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -18,6 +18,7 @@
 #include "olap/rowset/segment_v2/segment_iterator.h"
 
 #include <assert.h>
+#include <gen_cpp/Exprs_types.h>
 #include <gen_cpp/Types_types.h>
 #include <gen_cpp/olap_file.pb.h>
 
@@ -27,6 +28,7 @@
 #include <numeric>
 #include <set>
 #include <utility>
+#include <vector>
 
 // IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
 #include "common/compiler_util.h" // IWYU pragma: keep
@@ -42,6 +44,7 @@
 #include "olap/field.h"
 #include "olap/iterators.h"
 #include "olap/like_column_predicate.h"
+#include "olap/match_predicate.h"
 #include "olap/olap_common.h"
 #include "olap/primary_key_index.h"
 #include "olap/rowset/segment_v2/bitmap_index_reader.h"
@@ -1040,6 +1043,65 @@ Status SegmentIterator::_apply_inverted_index() {
             }
         }
     }
+
+    // delete from _common_expr_ctxs_push_down if a MATCH predicate will be 
removed from _col_predicates
+    // since it's not necessary to eval it any more to avoid index miss, which 
is added in _normalize_predicate
+    for (auto pred : _col_predicates) {
+        if (pred->type() == PredicateType::MATCH &&
+            std::find(remaining_predicates.begin(), 
remaining_predicates.end(), pred) ==
+                    remaining_predicates.end()) {
+            // TODO: change dynamic_cast to static_cast in the future
+            MatchPredicate* match_pred = dynamic_cast<MatchPredicate*>(pred);
+            if (match_pred == nullptr) {
+                LOG(WARNING) << pred->debug_string() << " should be 
MatchPredicate";
+                continue;
+            }
+            for (auto it = _common_expr_ctxs_push_down.begin();
+                 it != _common_expr_ctxs_push_down.end(); it++) {
+                auto expr = (*it)->root().get();
+                // check expr type and child is the same as match predicate
+                if (expr->node_type() == TExprNodeType::MATCH_PRED &&
+                    expr->children().size() == 2 && 
expr->get_child(0)->is_slot_ref() &&
+                    expr->get_child(1)->is_constant()) {
+                    // TODO: change dynamic_cast to static_cast in the future
+                    auto slot_ref = 
dynamic_cast<vectorized::VSlotRef*>(expr->get_child(0).get());
+                    if (slot_ref == nullptr) {
+                        LOG(WARNING) << expr->get_child(0)->debug_string() << 
" should be SlotRef";
+                        continue;
+                    }
+                    std::shared_ptr<ColumnPtrWrapper> const_col_wrapper;
+                    auto res = expr->get_child(1)->get_const_col((*it).get(), 
&const_col_wrapper);
+                    if (res.ok() && const_col_wrapper) {
+                        const auto const_column = 
check_and_get_column<vectorized::ColumnConst>(
+                                const_col_wrapper->column_ptr);
+                        if (const_column) {
+                            // check column id and predicate value is the same
+                            if ((match_pred->column_id() ==
+                                 _schema->column_id(slot_ref->column_id())) &&
+                                (StringRef(match_pred->get_value()) ==
+                                 const_column->get_data_at(0))) {
+                                // delete the expr from 
_remaining_conjunct_roots and _common_expr_ctxs_push_down
+                                for (auto it1 = 
_remaining_conjunct_roots.begin();
+                                     it1 != _remaining_conjunct_roots.end(); 
it1++) {
+                                    if (it1->get() == expr) {
+                                        VLOG_DEBUG << "delete expr from 
_remaining_conjunct_roots "
+                                                   << expr->debug_string();
+                                        _remaining_conjunct_roots.erase(it1);
+                                        break;
+                                    }
+                                }
+                                VLOG_DEBUG << "delete expr from 
_common_expr_ctxs_push_down "
+                                           << expr->debug_string();
+                                _common_expr_ctxs_push_down.erase(it);
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
     _col_predicates = std::move(remaining_predicates);
     _opts.stats->rows_inverted_index_filtered += (input_rows - 
_row_bitmap.cardinality());
     return Status::OK();
@@ -1109,8 +1171,9 @@ Status SegmentIterator::_init_inverted_index_iterators() {
         int32_t unique_id = _opts.tablet_schema->column(cid).unique_id();
         if (_inverted_index_iterators.count(unique_id) < 1) {
             RETURN_IF_ERROR(_segment->new_inverted_index_iterator(
-                    _opts.tablet_schema->column(cid), 
_opts.tablet_schema->get_inverted_index(cid),
-                    _opts, &_inverted_index_iterators[unique_id]));
+                    _opts.tablet_schema->column(cid),
+                    _opts.tablet_schema->get_inverted_index(unique_id), _opts,
+                    &_inverted_index_iterators[unique_id]));
         }
     }
     return Status::OK();
diff --git 
a/regression-test/data/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.out
 
b/regression-test/data/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.out
new file mode 100644
index 00000000000..205c6ff5bb5
--- /dev/null
+++ 
b/regression-test/data/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.out
@@ -0,0 +1,9 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !dup_inverted_index_q18_string_match --
+-1916662625    false   78      -13460  21130   -2482   -1844741088     
29340.85        -1.472252906087688E9    73541395097702267.459   
30444229593217770.284   2022-01-14      2022-12-07T04:09:46     2022-06-25      
2022-01-11T04:05:07     32.142.250.139  [email protected]     Basil 
Place 15
+-1839832100    true    47      -22682  13555   10540   -1114845607     
20315.459       1.897044291188191E9     59057985418597209.391   
42382795220020608.641   2022-09-25      2022-10-20T11:17:05     2022-04-03      
2022-04-05T19:45:23     23.131.157.163  [email protected]     Nobel Road 97
+-1787254128    false   -86     5929    8007    91108   1646790590      
-12748.728      -1.61835257126454E8     44884498199065915.707   
90723335017080367.491   2022-09-12      2022-01-27T15:18:03     2022-02-28      
2022-07-12T22:09:36     100.38.193.37   [email protected]     Graedel Hill 56
+-1662034455    true    -60     -31209  13551   18569   -7634871        
6548.2676       -4.69704747046188E8     88115416211561753.125   
67337349559631643.630   2022-06-24      2022-09-22T16:14:22     2022-08-05      
2022-02-20T14:00:24     87.137.14.156   [email protected]        Ridgeway Hill 34
+-1024780352    true    -22     -8718   -31789  91547   1201680092      
8814.243        -1.81266387539064E9     32726063178438464.987   
55896571700759806.153   2022-11-07      2022-04-13T01:24:53     2022-05-25      
2022-11-15T00:40:51     130.140.209.209 [email protected] Manley Pass 57
+-769400722     true    -30     21454   27632   -64427  -1353560943     
26450.154       1.823927555911853E9     64469528968198612.444   
27416791107112909.849   2022-10-25      2022-06-15T23:07:05     2022-01-21      
2022-07-04T16:30:44     207.143.171.175 [email protected]        
Parkside Place 27
+
diff --git 
a/regression-test/suites/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.sql
 
b/regression-test/suites/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.sql
new file mode 100644
index 00000000000..78d38225148
--- /dev/null
+++ 
b/regression-test/suites/datatype_p0/scalar_types/sql/dup_inverted_index_q18_string_match.sql
@@ -0,0 +1 @@
+SELECT * FROM tbl_scalar_types_dup_inverted_index WHERE c_string MATCH 'part 
way road point hill trail crossing street court pass drive place circle plaza 
Junction Junction Center Avenue Alley Lane' AND abs(c_bigint) < 100000 ORDER BY 
k1;
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to