This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 5e1e725cee8 [feature](inverted index) Add multi_match function #37722
#38931 #39149 (#38877)
5e1e725cee8 is described below
commit 5e1e725cee8fd3d0c3ddecd9a7ea01ec9199297f
Author: zzzxl <[email protected]>
AuthorDate: Sat Aug 10 15:20:08 2024 +0800
[feature](inverted index) Add multi_match function #37722 #38931 #39149
(#38877)
---
.../olap/rowset/segment_v2/inverted_index_reader.h | 4 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 174 +++++++++++++++---
be/src/olap/rowset/segment_v2/segment_iterator.h | 33 ++++
be/src/vec/exprs/vectorized_fn_call.cpp | 44 ++++-
be/src/vec/exprs/vectorized_fn_call.h | 8 +-
be/src/vec/exprs/vexpr.cpp | 37 ++--
be/src/vec/exprs/vexpr.h | 16 +-
be/src/vec/exprs/vin_predicate.cpp | 4 +-
be/src/vec/exprs/vin_predicate.h | 3 +-
be/src/vec/exprs/vliteral.cpp | 21 +++
be/src/vec/exprs/vliteral.h | 2 +
be/src/vec/exprs/vslot_ref.cpp | 17 ++
be/src/vec/exprs/vslot_ref.h | 2 +
be/src/vec/functions/function.h | 28 ++-
be/src/vec/functions/function_multi_match.cpp | 194 +++++++++++++++++++++
be/src/vec/functions/function_multi_match.h | 70 ++++++++
be/src/vec/functions/simple_function_factory.h | 4 +-
.../doris/catalog/BuiltinScalarFunctions.java | 5 +-
.../expressions/functions/scalar/MultiMatch.java | 73 ++++++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 +
gensrc/script/doris_builtins_functions.py | 5 +
.../inverted_index_p0/test_index_multi_match.out | 49 ++++++
.../test_index_multi_match.groovy | 129 ++++++++++++++
23 files changed, 863 insertions(+), 64 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index af3642c7c90..cd5d89d4916 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -72,7 +72,6 @@ class InvertedIndexIterator;
class InvertedIndexQueryCacheHandle;
class InvertedIndexFileReader;
struct InvertedIndexQueryInfo;
-
class InvertedIndexReader : public
std::enable_shared_from_this<InvertedIndexReader> {
public:
explicit InvertedIndexReader(
@@ -153,6 +152,7 @@ protected:
TabletIndex _index_meta;
bool _has_null = true;
};
+using InvertedIndexReaderPtr = std::shared_ptr<InvertedIndexReader>;
class FullTextIndexReader : public InvertedIndexReader {
ENABLE_FACTORY_CREATOR(FullTextIndexReader);
@@ -306,6 +306,8 @@ public:
[[nodiscard]] const std::map<string, string>& get_index_properties() const;
[[nodiscard]] bool has_null() { return _reader->has_null(); };
+ const InvertedIndexReaderPtr& reader() { return _reader; }
+
private:
OlapReaderStatistics* _stats = nullptr;
RuntimeState* _runtime_state = nullptr;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 90653e1d577..9e1133f4620 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -314,6 +314,7 @@ Status SegmentIterator::_init_impl(const
StorageReadOptions& opts) {
for (auto& expr : _remaining_conjunct_roots) {
_calculate_pred_in_remaining_conjunct_root(expr);
}
+ _calculate_func_in_remaining_conjunct_root();
_column_predicate_info.reset(new ColumnPredicateInfo());
if (_schema->rowid_col_idx() > 0) {
@@ -558,6 +559,8 @@ Status
SegmentIterator::_get_row_ranges_by_column_conditions() {
++it;
}
}
+ _col_preds_except_leafnode_of_andnode.clear();
+ compound_func_exprs.clear();
// 1. if all conditions in the compound hit the inverted index and
there are no other expr to handle.
// 2. then there is no need to generate index_result_column.
if (_enable_common_expr_pushdown &&
_remaining_conjunct_roots.empty()) {
@@ -807,25 +810,32 @@ Status
SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
auto v_literal_expr =
std::dynamic_pointer_cast<doris::vectorized::VLiteral>(expr);
_column_predicate_info->query_values.insert(v_literal_expr->value());
} else if (node_type == TExprNodeType::BINARY_PRED || node_type ==
TExprNodeType::MATCH_PRED ||
- node_type == TExprNodeType::IN_PRED) {
- if (node_type == TExprNodeType::MATCH_PRED) {
- _column_predicate_info->query_op = "match";
- } else if (node_type == TExprNodeType::IN_PRED) {
- if (expr->op() == TExprOpcode::type::FILTER_IN) {
- _column_predicate_info->query_op = "in";
+ node_type == TExprNodeType::IN_PRED || node_type ==
TExprNodeType::FUNCTION_CALL) {
+ std::string result_sign;
+ if (node_type == TExprNodeType::FUNCTION_CALL) {
+ result_sign =
+ BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
std::to_string(expr->index_unique_id());
+ } else {
+ if (node_type == TExprNodeType::MATCH_PRED) {
+ _column_predicate_info->query_op = "match";
+ } else if (node_type == TExprNodeType::IN_PRED) {
+ if (expr->op() == TExprOpcode::type::FILTER_IN) {
+ _column_predicate_info->query_op = "in";
+ } else {
+ _column_predicate_info->query_op = "not_in";
+ }
} else {
- _column_predicate_info->query_op = "not_in";
+ _column_predicate_info->query_op =
expr->fn().name.function_name;
}
- } else {
- _column_predicate_info->query_op = expr->fn().name.function_name;
+ result_sign =
_gen_predicate_result_sign(_column_predicate_info.get());
}
+
// get child condition result in compound conditions
- auto pred_result_sign =
_gen_predicate_result_sign(_column_predicate_info.get());
_column_predicate_info.reset(new ColumnPredicateInfo());
- VLOG_DEBUG << "_gen_predicate_result_sign " << pred_result_sign;
- if (_rowid_result_for_index.count(pred_result_sign) > 0 &&
- _rowid_result_for_index[pred_result_sign].first) {
- auto apply_result =
_rowid_result_for_index[pred_result_sign].second;
+ VLOG_DEBUG << "result_sign " << result_sign;
+ if (_rowid_result_for_index.count(result_sign) > 0 &&
+ _rowid_result_for_index[result_sign].first) {
+ auto apply_result = _rowid_result_for_index[result_sign].second;
_pred_except_leafnode_of_andnode_evaluate_result.push_back(apply_result);
}
} else if (node_type == TExprNodeType::COMPOUND_PRED) {
@@ -869,7 +879,7 @@ Status SegmentIterator::_execute_compound_fn(const
std::string& function_name) {
bool SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() {
// no compound predicates push down, so no need to filter
- if (_col_preds_except_leafnode_of_andnode.size() == 0) {
+ if (_col_preds_except_leafnode_of_andnode.empty() &&
compound_func_exprs.empty()) {
return false;
}
for (auto pred : _col_preds_except_leafnode_of_andnode) {
@@ -883,6 +893,14 @@ bool
SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() {
return false;
}
}
+ for (const auto& func_expr_pair : compound_func_exprs) {
+ const auto& expr = func_expr_pair.first;
+ std::string pred_result_sign =
+ BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
std::to_string(expr->index_unique_id());
+ if (!_rowid_result_for_index.contains(pred_result_sign)) {
+ return false;
+ }
+ }
return true;
}
@@ -994,6 +1012,16 @@ Status
SegmentIterator::_apply_index_except_leafnode_of_andnode() {
}
}
+ for (const auto& func_expr_pair : compound_func_exprs) {
+ const auto& expr = func_expr_pair.first;
+ const auto& expr_ctx = func_expr_pair.second;
+ auto result = std::make_shared<roaring::Roaring>();
+ RETURN_IF_ERROR(execute_func_expr(expr, expr_ctx, result));
+ std::string result_sign =
+ BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
std::to_string(expr->index_unique_id());
+ _rowid_result_for_index.emplace(result_sign, std::make_pair(true,
std::move(*result)));
+ }
+
return Status::OK();
}
@@ -1269,18 +1297,6 @@ Status SegmentIterator::_apply_inverted_index() {
std::vector<ColumnPredicate*> remaining_predicates;
std::set<const ColumnPredicate*> no_need_to_pass_column_predicate_set;
- // TODO:Comment out this code before introducing range query functionality
- /*for (const auto& entry : _opts.col_id_to_predicates) {
- ColumnId column_id = entry.first;
- auto pred = entry.second;
- bool continue_apply = true;
- RETURN_IF_ERROR(_apply_inverted_index_on_block_column_predicate(
- column_id, pred.get(), no_need_to_pass_column_predicate_set,
&continue_apply));
- if (!continue_apply) {
- break;
- }
- }*/
-
for (auto pred : _col_predicates) {
if (no_need_to_pass_column_predicate_set.count(pred) > 0) {
continue;
@@ -1316,6 +1332,23 @@ Status SegmentIterator::_apply_inverted_index() {
}
}
+ for (const auto& func_expr_pair : no_compound_func_exprs) {
+ const auto& expr = func_expr_pair.first;
+ const auto& expr_ctx = func_expr_pair.second;
+ auto result = std::make_shared<roaring::Roaring>();
+ RETURN_IF_ERROR(execute_func_expr(expr, expr_ctx, result));
+ _row_bitmap &= *result;
+ for (auto it = _remaining_conjunct_roots.begin(); it !=
_remaining_conjunct_roots.end();) {
+ if (*it == expr) {
+ std::erase_if(_common_expr_ctxs_push_down,
+ [&it](const auto& iter) { return iter->root() ==
*it; });
+ it = _remaining_conjunct_roots.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ }
+
_col_predicates = std::move(remaining_predicates);
_opts.stats->rows_inverted_index_filtered += (input_rows -
_row_bitmap.cardinality());
return Status::OK();
@@ -1392,6 +1425,17 @@ Status SegmentIterator::_init_inverted_index_iterators()
{
return Status::OK();
}
+Status SegmentIterator::_init_inverted_index_iterators(ColumnId cid) {
+ std::lock_guard lock(_idx_init_lock);
+ if (_inverted_index_iterators[cid] == nullptr) {
+ return _segment->new_inverted_index_iterator(
+ _opts.tablet_schema->column(cid),
+
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid)),
+ _opts, &_inverted_index_iterators[cid]);
+ }
+ return Status::OK();
+}
+
Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include,
rowid_t upper_bound,
rowid_t* rowid) {
if (_segment->_tablet_schema->keys_type() == UNIQUE_KEYS &&
@@ -2822,6 +2866,64 @@ void
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
}
}
+void SegmentIterator::_calculate_func_in_remaining_conjunct_root() {
+ auto hash = [](const vectorized::VExprSPtr& expr) -> std::size_t {
+ return std::hash<std::string>()(expr->expr_name());
+ };
+ auto equal = [](const vectorized::VExprSPtr& lhs, const
vectorized::VExprSPtr& rhs) -> bool {
+ return lhs->equals(*rhs);
+ };
+
+ uint32_t next_id = 0;
+ std::unordered_map<vectorized::VExprSPtr, uint32_t, decltype(hash),
decltype(equal)> unique_map(
+ 0, hash, equal);
+
+ auto gen_func_unique_id = [&unique_map, &next_id](const
vectorized::VExprSPtr& expr) {
+ auto it = unique_map.find(expr);
+ if (it != unique_map.end()) {
+ return it->second;
+ } else {
+ unique_map[expr] = ++next_id;
+ return next_id;
+ }
+ };
+
+ for (const auto& root_expr_ctx : _common_expr_ctxs_push_down) {
+ const auto& root_expr = root_expr_ctx->root();
+ if (root_expr == nullptr) {
+ continue;
+ }
+
+ std::stack<std::pair<vectorized::VExprSPtr, bool>> stack;
+ stack.emplace(root_expr, false);
+
+ while (!stack.empty()) {
+ const auto& [expr, has_compound_pred] = stack.top();
+ stack.pop();
+
+ bool current_has_compound_pred =
+ has_compound_pred || (expr->node_type() ==
TExprNodeType::COMPOUND_PRED);
+
+ if (expr->node_type() == TExprNodeType::FUNCTION_CALL &&
+ expr->can_push_down_to_index()) {
+ expr->set_index_unique_id(gen_func_unique_id(expr));
+ if (current_has_compound_pred) {
+ compound_func_exprs.emplace_back(expr, root_expr_ctx);
+ } else {
+ no_compound_func_exprs.emplace_back(expr, root_expr_ctx);
+ }
+ }
+
+ const auto& children = expr->children();
+ for (int32_t i = children.size() - 1; i >= 0; --i) {
+ if (!children[i]->children().empty()) {
+ stack.emplace(children[i], current_has_compound_pred);
+ }
+ }
+ }
+ }
+}
+
bool SegmentIterator::_no_need_read_key_data(ColumnId cid,
vectorized::MutableColumnPtr& column,
size_t nrows_read) {
if (!((_opts.tablet_schema->keys_type() == KeysType::DUP_KEYS ||
@@ -2900,5 +3002,23 @@ bool SegmentIterator::_can_opt_topn_reads() const {
return result;
}
+Status SegmentIterator::execute_func_expr(const vectorized::VExprSPtr& expr,
+ const vectorized::VExprContextSPtr&
expr_ctx,
+ std::shared_ptr<roaring::Roaring>&
result) {
+ const auto& expr0 = expr->get_child(0);
+ if (!expr0 || expr0->node_type() != TExprNodeType::SLOT_REF) {
+ return Status::RuntimeError("cannot perform index filtering");
+ }
+
+ FuncExprParams params;
+ auto slot_expr = std::static_pointer_cast<vectorized::VSlotRef>(expr0);
+ params._column_id = _schema->column_id(slot_expr->column_id());
+ params._unique_id = _schema->unique_id(slot_expr->column_id());
+ params._column_name =
_opts.tablet_schema->column(params._column_id).name();
+ params._segment_iterator = this;
+
+ return expr->eval_inverted_index(expr_ctx.get(), params, result);
+}
+
} // namespace segment_v2
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index a062216ab0b..ecae1ea6aff 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -107,6 +107,15 @@ struct ColumnPredicateInfo {
int32_t column_id;
};
+class SegmentIterator;
+struct FuncExprParams {
+ ColumnId _column_id = 0;
+ uint32_t _unique_id = 0;
+ std::string _column_name;
+ SegmentIterator* _segment_iterator = nullptr;
+ std::shared_ptr<roaring::Roaring> result;
+};
+
class SegmentIterator : public RowwiseIterator {
public:
SegmentIterator(std::shared_ptr<Segment> segment, SchemaSPtr schema);
@@ -123,6 +132,8 @@ public:
std::vector<RowLocation>* block_row_locations) override;
const Schema& schema() const override { return *_schema; }
+ Segment& segment() { return *_segment; }
+ StorageReadOptions& storage_read_options() { return _opts; }
bool is_lazy_materialization_read() const override { return
_lazy_materialization_read; }
uint64_t data_id() const override { return _segment->id(); }
RowsetId rowset_id() const { return _segment->rowset_id(); }
@@ -142,6 +153,11 @@ public:
return updated;
}
+ std::vector<std::unique_ptr<InvertedIndexIterator>>&
inverted_index_iterators() {
+ return _inverted_index_iterators;
+ }
+ [[nodiscard]] Status _init_inverted_index_iterators(ColumnId cid);
+
private:
Status _next_batch_internal(vectorized::Block* block);
@@ -308,6 +324,7 @@ private:
bool _check_column_pred_all_push_down(const std::string& column_name, bool
in_compound = false,
bool is_match = false);
void _calculate_pred_in_remaining_conjunct_root(const
vectorized::VExprSPtr& expr);
+ void _calculate_func_in_remaining_conjunct_root();
// todo(wb) remove this method after RowCursor is removed
void _convert_rowcursor_to_short_key(const RowCursor& key, size_t
num_keys) {
@@ -387,6 +404,10 @@ private:
bool _can_opt_topn_reads() const;
+ Status execute_func_expr(const vectorized::VExprSPtr& expr,
+ const vectorized::VExprContextSPtr& expr_ctx,
+ std::shared_ptr<roaring::Roaring>& result);
+
class BitmapRangeIterator;
class BackwardBitmapRangeIterator;
@@ -452,6 +473,11 @@ private:
// make a copy of `_opts.column_predicates` in order to make local changes
std::vector<ColumnPredicate*> _col_predicates;
std::vector<ColumnPredicate*> _col_preds_except_leafnode_of_andnode;
+
+ using FuncExprPair = std::pair<vectorized::VExprSPtr,
vectorized::VExprContextSPtr>;
+ std::vector<FuncExprPair> no_compound_func_exprs;
+ std::vector<FuncExprPair> compound_func_exprs;
+
vectorized::VExprContextSPtrs _common_expr_ctxs_push_down;
bool _enable_common_expr_pushdown = false;
std::vector<vectorized::VExprSPtr> _remaining_conjunct_roots;
@@ -493,6 +519,13 @@ private:
std::set<int32_t> _output_columns;
std::unique_ptr<HierarchicalDataReader> _path_reader;
+
+ std::vector<uint8_t> _ret_flags;
+
+ std::unordered_map<int, std::unordered_map<std::string, bool>>
+ _column_predicate_inverted_index_status;
+
+ std::mutex _idx_init_lock;
};
} // namespace segment_v2
diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp
b/be/src/vec/exprs/vectorized_fn_call.cpp
index 77b6b9a7c9f..5b5cdc2e8a0 100644
--- a/be/src/vec/exprs/vectorized_fn_call.cpp
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp
@@ -112,8 +112,7 @@ Status VectorizedFnCall::prepare(RuntimeState* state, const
RowDescriptor& desc,
}
VExpr::register_function_context(state, context);
_function_name = _fn.name.function_name;
- _can_fast_execute = _function->can_fast_execute() && _children.size() == 2
&&
- _children[0]->is_slot_ref() &&
_children[1]->is_literal();
+ _can_fast_execute = can_fast_execute();
_prepare_finished = true;
return Status::OK();
}
@@ -215,4 +214,45 @@ std::string VectorizedFnCall::debug_string(const
std::vector<VectorizedFnCall*>&
out << "]";
return out.str();
}
+
+bool VectorizedFnCall::can_push_down_to_index() const {
+ return _function->can_push_down_to_index();
+}
+
+bool VectorizedFnCall::can_fast_execute() const {
+ auto function_name = _function->get_name();
+ if (function_name == "eq" || function_name == "ne" || function_name ==
"lt" ||
+ function_name == "gt" || function_name == "le" || function_name ==
"ge") {
+ if (_children.size() == 2 && _children[0]->is_slot_ref() &&
_children[1]->is_literal()) {
+ return true;
+ }
+ }
+ return _function->can_push_down_to_index();
+}
+
+Status VectorizedFnCall::eval_inverted_index(VExprContext* context,
+ segment_v2::FuncExprParams&
params,
+
std::shared_ptr<roaring::Roaring>& result) {
+ return
_function->eval_inverted_index(context->fn_context(_fn_context_index), params,
result);
+}
+
+bool VectorizedFnCall::equals(const VExpr& other) {
+ const auto* other_ptr = dynamic_cast<const VectorizedFnCall*>(&other);
+ if (!other_ptr) {
+ return false;
+ }
+ if (this->_function_name != other_ptr->_function_name) {
+ return false;
+ }
+ if (this->children().size() != other_ptr->children().size()) {
+ return false;
+ }
+ for (size_t i = 0; i < this->children().size(); i++) {
+ if (!this->get_child(i)->equals(*other_ptr->get_child(i))) {
+ return false;
+ }
+ }
+ return true;
+}
+
} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vectorized_fn_call.h
b/be/src/vec/exprs/vectorized_fn_call.h
index 286e9daac45..3ef7a7464f5 100644
--- a/be/src/vec/exprs/vectorized_fn_call.h
+++ b/be/src/vec/exprs/vectorized_fn_call.h
@@ -66,9 +66,14 @@ public:
}
static std::string debug_string(const std::vector<VectorizedFnCall*>&
exprs);
+ bool can_push_down_to_index() const override;
+ bool can_fast_execute() const override;
+ Status eval_inverted_index(VExprContext* context,
segment_v2::FuncExprParams& params,
+ std::shared_ptr<roaring::Roaring>& result)
override;
+ bool equals(const VExpr& other) override;
+
protected:
FunctionBasePtr _function;
- bool _can_fast_execute = false;
std::string _expr_name;
std::string _function_name;
@@ -76,4 +81,5 @@ private:
Status _do_execute(doris::vectorized::VExprContext* context,
doris::vectorized::Block* block,
int* result_column_id, std::vector<size_t>& args);
};
+
} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index bb6e48f6084..e6a2cf117a2 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -624,24 +624,33 @@ bool VExpr::fast_execute(Block& block, const
ColumnNumbers& arguments, size_t re
}
std::string VExpr::gen_predicate_result_sign(Block& block, const
ColumnNumbers& arguments,
- const std::string& function_name)
{
+ const std::string& function_name)
const {
std::string pred_result_sign;
- std::string column_name = block.get_by_position(arguments[0]).name;
- pred_result_sign +=
- BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" +
function_name + "_";
- if (function_name == "in" || function_name == "not_in") {
- // Generating 'result_sign' from 'inlist' requires sorting the values.
- std::set<std::string> values;
- for (size_t i = 1; i < arguments.size(); i++) {
- const auto& entry = block.get_by_position(arguments[i]);
- values.insert(entry.type->to_string(*entry.column, 0));
- }
- pred_result_sign += boost::join(values, ",");
+ if (this->node_type() == TExprNodeType::FUNCTION_CALL) {
+ pred_result_sign =
+ BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
std::to_string(this->index_unique_id());
} else {
- const auto& entry = block.get_by_position(arguments[1]);
- pred_result_sign += entry.type->to_string(*entry.column, 0);
+ std::string column_name = block.get_by_position(arguments[0]).name;
+ pred_result_sign +=
+ BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" +
function_name + "_";
+ if (function_name == "in" || function_name == "not_in") {
+ // Generating 'result_sign' from 'inlist' requires sorting the
values.
+ std::set<std::string> values;
+ for (size_t i = 1; i < arguments.size(); i++) {
+ const auto& entry = block.get_by_position(arguments[i]);
+ values.insert(entry.type->to_string(*entry.column, 0));
+ }
+ pred_result_sign += boost::join(values, ",");
+ } else {
+ const auto& entry = block.get_by_position(arguments[1]);
+ pred_result_sign += entry.type->to_string(*entry.column, 0);
+ }
}
return pred_result_sign;
}
+bool VExpr::equals(const VExpr& other) {
+ return false;
+}
+
} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index c2f4d2ef6fd..708b6c8ea90 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -226,7 +226,17 @@ public:
size_t input_rows_count, const std::string&
function_name);
std::string gen_predicate_result_sign(Block& block, const ColumnNumbers&
arguments,
- const std::string& function_name);
+ const std::string& function_name)
const;
+
+ virtual bool can_push_down_to_index() const { return false; }
+ virtual bool can_fast_execute() const { return false; }
+ virtual Status eval_inverted_index(VExprContext* context,
segment_v2::FuncExprParams& params,
+ std::shared_ptr<roaring::Roaring>&
result) {
+ return Status::NotSupported("Not supported
execute_with_inverted_index");
+ }
+ virtual bool equals(const VExpr& other);
+ void set_index_unique_id(uint32_t index_unique_id) { _index_unique_id =
index_unique_id; }
+ uint32_t index_unique_id() const { return _index_unique_id; }
protected:
/// Simple debug string that provides no expr subclass-specific information
@@ -292,6 +302,10 @@ protected:
// for concrete classes
bool _prepare_finished = false;
bool _open_finished = false;
+
+ // ensuring uniqueness during index traversal
+ uint32_t _index_unique_id = 0;
+ bool _can_fast_execute = false;
};
} // namespace vectorized
diff --git a/be/src/vec/exprs/vin_predicate.cpp
b/be/src/vec/exprs/vin_predicate.cpp
index 8eaf42abf23..4affec791a4 100644
--- a/be/src/vec/exprs/vin_predicate.cpp
+++ b/be/src/vec/exprs/vin_predicate.cpp
@@ -78,9 +78,7 @@ Status VInPredicate::prepare(RuntimeState* state, const
RowDescriptor& desc,
VExpr::register_function_context(state, context);
_prepare_finished = true;
-
- _can_fast_execute = _function->can_fast_execute();
-
+ _can_fast_execute = can_fast_execute();
return Status::OK();
}
diff --git a/be/src/vec/exprs/vin_predicate.h b/be/src/vec/exprs/vin_predicate.h
index 82e99ac447a..5f197f996e9 100644
--- a/be/src/vec/exprs/vin_predicate.h
+++ b/be/src/vec/exprs/vin_predicate.h
@@ -54,6 +54,7 @@ public:
const FunctionBasePtr function() { return _function; }
bool is_not_in() const { return _is_not_in; };
+ bool can_fast_execute() const override { return true; }
private:
FunctionBasePtr _function;
@@ -61,7 +62,5 @@ private:
const bool _is_not_in;
static const constexpr char* function_name = "in";
-
- bool _can_fast_execute = false;
};
} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exprs/vliteral.cpp b/be/src/vec/exprs/vliteral.cpp
index c7fbb081675..1d02a63321b 100644
--- a/be/src/vec/exprs/vliteral.cpp
+++ b/be/src/vec/exprs/vliteral.cpp
@@ -96,4 +96,25 @@ std::string VLiteral::debug_string() const {
return out.str();
}
+bool VLiteral::equals(const VExpr& other) {
+ const auto* other_ptr = dynamic_cast<const VLiteral*>(&other);
+ if (!other_ptr) {
+ return false;
+ }
+ if (this->_expr_name != other_ptr->_expr_name) {
+ return false;
+ }
+ if (this->_column_ptr->structure_equals(*other_ptr->_column_ptr)) {
+ if (this->_column_ptr->size() != other_ptr->_column_ptr->size()) {
+ return false;
+ }
+ for (size_t i = 0; i < this->_column_ptr->size(); i++) {
+ if (this->_column_ptr->compare_at(i, i, *other_ptr->_column_ptr,
-1) != 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vliteral.h b/be/src/vec/exprs/vliteral.h
index c79d795e987..b7a48145a8d 100644
--- a/be/src/vec/exprs/vliteral.h
+++ b/be/src/vec/exprs/vliteral.h
@@ -57,6 +57,8 @@ public:
bool is_literal() const override { return true; }
+ bool equals(const VExpr& other) override;
+
protected:
ColumnPtr _column_ptr;
std::string _expr_name;
diff --git a/be/src/vec/exprs/vslot_ref.cpp b/be/src/vec/exprs/vslot_ref.cpp
index de9a10137f2..df761360d08 100644
--- a/be/src/vec/exprs/vslot_ref.cpp
+++ b/be/src/vec/exprs/vslot_ref.cpp
@@ -112,4 +112,21 @@ std::string VSlotRef::debug_string() const {
out << "SlotRef(slot_id=" << _slot_id << VExpr::debug_string() << ")";
return out.str();
}
+
+bool VSlotRef::equals(const VExpr& other) {
+ if (!VExpr::equals(other)) {
+ return false;
+ }
+ const auto* other_ptr = dynamic_cast<const VSlotRef*>(&other);
+ if (!other_ptr) {
+ return false;
+ }
+ if (this->_slot_id != other_ptr->_slot_id || this->_column_id !=
other_ptr->_column_id ||
+ this->_column_name != other_ptr->_column_name ||
+ this->_column_label != other_ptr->_column_label) {
+ return false;
+ }
+ return true;
+}
+
} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vslot_ref.h b/be/src/vec/exprs/vslot_ref.h
index f96728eda5c..2bea6ea5c06 100644
--- a/be/src/vec/exprs/vslot_ref.h
+++ b/be/src/vec/exprs/vslot_ref.h
@@ -52,6 +52,8 @@ public:
int slot_id() const { return _slot_id; }
+ bool equals(const VExpr& other) override;
+
private:
int _slot_id;
int _column_id;
diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h
index a3f004a09f5..d880836248f 100644
--- a/be/src/vec/functions/function.h
+++ b/be/src/vec/functions/function.h
@@ -31,6 +31,7 @@
#include "common/exception.h"
#include "common/status.h"
+#include "olap/rowset/segment_v2/inverted_index_reader.h"
#include "udf/udf.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
@@ -40,6 +41,10 @@
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_nullable.h"
+namespace doris::segment_v2 {
+struct FuncExprParams;
+} // namespace doris::segment_v2
+
namespace doris::vectorized {
#define RETURN_REAL_TYPE_FOR_DATEV2_FUNCTION(TYPE)
\
@@ -54,6 +59,7 @@ namespace doris::vectorized {
: std::make_shared<TYPE>();
class Field;
+class VExpr;
// Only use dispose the variadic argument
template <typename T>
@@ -185,8 +191,6 @@ public:
return Status::OK();
}
- virtual bool can_fast_execute() const { return false; }
-
virtual bool is_use_default_implementation_for_constants() const = 0;
/// The property of monotonicity for a certain range.
@@ -214,6 +218,13 @@ public:
get_name());
return Monotonicity {};
}
+
+ virtual bool can_push_down_to_index() const { return false; }
+ virtual Status eval_inverted_index(FunctionContext* context,
segment_v2::FuncExprParams& params,
+ std::shared_ptr<roaring::Roaring>&
result) {
+ return Status::NotSupported("eval_inverted_index is not supported in
function: ",
+ get_name());
+ }
};
using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
@@ -485,13 +496,6 @@ public:
return function->close(context, scope);
}
- bool can_fast_execute() const override {
- auto function_name = function->get_name();
- return function_name == "eq" || function_name == "ne" || function_name
== "lt" ||
- function_name == "gt" || function_name == "le" || function_name
== "ge" ||
- function_name == "in" || function_name == "not_in";
- }
-
IFunctionBase::Monotonicity get_monotonicity_for_range(const IDataType&
type, const Field& left,
const Field& right)
const override {
return function->get_monotonicity_for_range(type, left, right);
@@ -501,6 +505,12 @@ public:
return function->is_use_default_implementation_for_constants();
}
+ bool can_push_down_to_index() const override { return
function->can_push_down_to_index(); }
+ Status eval_inverted_index(FunctionContext* context,
segment_v2::FuncExprParams& params,
+ std::shared_ptr<roaring::Roaring>& result)
override {
+ return function->eval_inverted_index(context, params, result);
+ }
+
private:
std::shared_ptr<IFunction> function;
DataTypes arguments;
diff --git a/be/src/vec/functions/function_multi_match.cpp
b/be/src/vec/functions/function_multi_match.cpp
new file mode 100644
index 00000000000..d34526d0f29
--- /dev/null
+++ b/be/src/vec/functions/function_multi_match.cpp
@@ -0,0 +1,194 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/function_multi_match.h"
+
+#include <gen_cpp/PaloBrokerService_types.h>
+#include <glog/logging.h>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/classification.hpp>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <roaring/roaring.hh>
+#include <string>
+#include <vector>
+
+#include "io/fs/file_reader.h"
+#include "olap/olap_common.h"
+#include "olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h"
+#include "olap/rowset/segment_v2/segment_iterator.h"
+#include "runtime/primitive_type.h"
+#include "vec/columns/column.h"
+#include "vec/data_types/data_type.h"
+#include "vec/exprs/varray_literal.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vslot_ref.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+Status FunctionMultiMatch::execute_impl(FunctionContext* /*context*/, Block&
block,
+ const ColumnNumbers& arguments, size_t
result,
+ size_t /*input_rows_count*/) const {
+ return Status::RuntimeError("only inverted index queries are supported");
+}
+
+Status FunctionMultiMatch::open(FunctionContext* context,
+ FunctionContext::FunctionStateScope scope) {
+ if (scope == FunctionContext::THREAD_LOCAL) {
+ return Status::OK();
+ }
+
+ DCHECK(context->get_num_args() == 4);
+ for (int i = 0; i < context->get_num_args(); ++i) {
+ DCHECK(is_string_type(context->get_arg_type(i)->type));
+ }
+
+ std::shared_ptr<MatchParam> state = std::make_shared<MatchParam>();
+ context->set_function_state(scope, state);
+ for (int i = 0; i < context->get_num_args(); ++i) {
+ const auto& const_column_ptr = context->get_constant_col(i);
+ if (const_column_ptr) {
+ auto const_data = const_column_ptr->column_ptr->get_data_at(0);
+ switch (i) {
+ case 1: {
+ std::string field_names_str = const_data.to_string();
+ field_names_str.erase(
+ std::remove_if(field_names_str.begin(),
field_names_str.end(),
+ [](unsigned char c) { return
std::isspace(c); }),
+ field_names_str.end());
+ std::vector<std::string> field_names;
+ boost::split(field_names, field_names_str,
boost::algorithm::is_any_of(","));
+ for (const auto& field_name : field_names) {
+ if (!field_name.empty()) {
+ state->fields.insert(field_name);
+ }
+ }
+ } break;
+ case 2:
+ state->type = const_data.to_string();
+ break;
+ case 3:
+ state->query = const_data.to_string();
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return Status::OK();
+}
+
+Status FunctionMultiMatch::eval_inverted_index(FunctionContext* context,
+ segment_v2::FuncExprParams&
params,
+
std::shared_ptr<roaring::Roaring>& result) {
+ auto* match_param = reinterpret_cast<MatchParam*>(
+ context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+ if (match_param == nullptr) {
+ return Status::RuntimeError("function parameter parsing failed");
+ }
+ match_param->fields.insert(params._column_name);
+
+ const auto& segment_iterator = params._segment_iterator;
+ const auto& opts = segment_iterator->storage_read_options();
+ const auto& tablet_schema = opts.tablet_schema;
+
+ std::vector<ColumnId> columns_ids;
+ for (const auto& column_name : match_param->fields) {
+ auto cid = tablet_schema->field_index(column_name);
+ if (cid < 0) {
+ return Status::RuntimeError("column name is incorrect");
+ }
+ const auto& column = tablet_schema->column(cid);
+ if (!is_string_type(column.type())) {
+ return Status::RuntimeError("column type is incorrect");
+ }
+ if (!tablet_schema->has_inverted_index(column)) {
+ return Status::RuntimeError("column index is incorrect");
+ }
+ columns_ids.emplace_back(cid);
+ }
+
+ // query type
+ InvertedIndexQueryType query_type;
+ if (match_param->type == "phrase_prefix") {
+ query_type = InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
+ } else {
+ return Status::RuntimeError("query type is incorrect");
+ }
+
+ // cache key
+ roaring::Roaring cids_str;
+ cids_str.addMany(columns_ids.size(), columns_ids.data());
+ cids_str.runOptimize();
+ std::string column_name_binary(cids_str.getSizeInBytes(), 0);
+ cids_str.write(column_name_binary.data());
+
+ InvertedIndexQueryCache::CacheKey cache_key;
+ io::Path index_path = segment_iterator->segment().file_reader()->path();
+ cache_key.index_path = index_path.parent_path() / index_path.stem();
+ cache_key.column_name = column_name_binary;
+ cache_key.query_type = query_type;
+ cache_key.value = match_param->query;
+
+ // query cache
+ auto* cache = InvertedIndexQueryCache::instance();
+ InvertedIndexQueryCacheHandle cache_handler;
+ if (cache->lookup(cache_key, &cache_handler)) {
+ result = cache_handler.get_bitmap();
+ return Status::OK();
+ }
+
+ // search
+ for (const auto& column_name : match_param->fields) {
+ auto cid = tablet_schema->field_index(column_name);
+ const auto& column = tablet_schema->column(column_name);
+
+ auto& index_iterator =
segment_iterator->inverted_index_iterators()[cid];
+ if (!index_iterator) {
+
RETURN_IF_ERROR(segment_iterator->_init_inverted_index_iterators(cid));
+ }
+ const auto& index_reader = index_iterator->reader();
+
+ auto single_result = std::make_shared<roaring::Roaring>();
+ StringRef query_value(match_param->query.data());
+ auto index_version =
tablet_schema->get_inverted_index_storage_format();
+ if (index_version == InvertedIndexStorageFormatPB::V1) {
+ RETURN_IF_ERROR(index_reader->query(opts.stats,
opts.runtime_state, column_name,
+ &query_value, query_type,
single_result));
+ } else if (index_version == InvertedIndexStorageFormatPB::V2) {
+ RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state,
+
std::to_string(column.unique_id()), &query_value,
+ query_type, single_result));
+ }
+ (*result) |= (*single_result);
+ }
+
+ result->runOptimize();
+ cache->insert(cache_key, result, &cache_handler);
+
+ return Status::OK();
+}
+
+void register_function_multi_match(SimpleFunctionFactory& factory) {
+ factory.register_function<FunctionMultiMatch>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_multi_match.h
b/be/src/vec/functions/function_multi_match.h
new file mode 100644
index 00000000000..b7d2bd3c30e
--- /dev/null
+++ b/be/src/vec/functions/function_multi_match.h
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <boost/algorithm/string/split.hpp>
+
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+class MatchParam {
+public:
+ std::string query;
+ std::set<std::string> fields;
+ std::string type;
+};
+
+class FunctionMultiMatch : public IFunction {
+public:
+ static constexpr auto name = "multi_match";
+
+ static FunctionPtr create() { return
std::make_shared<FunctionMultiMatch>(); }
+ using NullMapType = PaddedPODArray<UInt8>;
+
+ String get_name() const override { return name; }
+
+ bool is_variadic() const override { return false; }
+
+ size_t get_number_of_arguments() const override { return 4; }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return std::make_shared<DataTypeUInt8>();
+ }
+
+ Status open(FunctionContext* context, FunctionContext::FunctionStateScope
scope) override;
+
+ Status close(FunctionContext* context, FunctionContext::FunctionStateScope
scope) override {
+ return Status::OK();
+ }
+
+ Status execute_impl(FunctionContext* /*context*/, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t /*input_rows_count*/) const
override;
+
+ bool can_push_down_to_index() const override { return true; }
+
+ Status eval_inverted_index(FunctionContext* context,
segment_v2::FuncExprParams& params,
+ std::shared_ptr<roaring::Roaring>& result)
override;
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/simple_function_factory.h
b/be/src/vec/functions/simple_function_factory.h
index 68e2f85e01c..e72e4f494d9 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -95,15 +95,14 @@ void
register_function_multi_string_position(SimpleFunctionFactory& factory);
void register_function_multi_string_search(SimpleFunctionFactory& factory);
void register_function_width_bucket(SimpleFunctionFactory& factory);
void register_function_ignore(SimpleFunctionFactory& factory);
-
void register_function_encryption(SimpleFunctionFactory& factory);
void register_function_regexp_extract(SimpleFunctionFactory& factory);
void register_function_hex_variadic(SimpleFunctionFactory& factory);
void register_function_match(SimpleFunctionFactory& factory);
void register_function_tokenize(SimpleFunctionFactory& factory);
-
void register_function_url(SimpleFunctionFactory& factory);
void register_function_ip(SimpleFunctionFactory& factory);
+void register_function_multi_match(SimpleFunctionFactory& factory);
class SimpleFunctionFactory {
using Creator = std::function<FunctionBuilderPtr()>;
@@ -286,6 +285,7 @@ public:
register_function_tokenize(instance);
register_function_ignore(instance);
register_function_variant_element(instance);
+ register_function_multi_match(instance);
});
return instance;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 0d353eb3b0d..b418c66e3ef 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -298,6 +298,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.MonthName;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MonthsAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MonthsDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MonthsSub;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.MultiMatch;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MultiMatchAny;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MultiSearchAllPositions;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash332;
@@ -906,8 +907,8 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(YearWeek.class, "yearweek"),
scalar(YearsAdd.class, "years_add"),
scalar(YearsDiff.class, "years_diff"),
- scalar(YearsSub.class, "years_sub")
- );
+ scalar(YearsSub.class, "years_sub"),
+ scalar(MultiMatch.class, "multi_match"));
public static final BuiltinScalarFunctions INSTANCE = new
BuiltinScalarFunctions();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
new file mode 100644
index 00000000000..3df662808ed
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BooleanType;
+import org.apache.doris.nereids.types.StringType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'multi_match'. This class is generated by GenerateFunction.
+ */
+public class MultiMatch extends ScalarFunction
+ implements BinaryExpression, ExplicitlyCastableSignature,
AlwaysNotNullable {
+
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+ FunctionSignature.ret(BooleanType.INSTANCE)
+ .args(StringType.INSTANCE,
+ StringType.INSTANCE,
+ StringType.INSTANCE,
+ StringType.INSTANCE)
+ );
+
+ /**
+ * constructor with 4 arguments.
+ */
+ public MultiMatch(Expression arg0, Expression arg1, Expression arg2,
Expression arg3) {
+ super("multi_match", arg0, arg1, arg2, arg3);
+ }
+
+ /**
+ * withChildren.
+ */
+ @Override
+ public MultiMatch withChildren(List<Expression> children) {
+ Preconditions.checkArgument(children.size() == 4);
+ return new MultiMatch(children.get(0), children.get(1),
children.get(2), children.get(3));
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitMultiMatch(this, context);
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index e6c6a6d0439..3f0e36ed5eb 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -299,6 +299,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.MonthName;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MonthsAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MonthsDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MonthsSub;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.MultiMatch;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MultiMatchAny;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MultiSearchAllPositions;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash332;
@@ -2157,4 +2158,8 @@ public interface ScalarFunctionVisitor<R, C> {
default R visitStructElement(StructElement structElement, C context) {
return visitScalarFunction(structElement, context);
}
+
+ default R visitMultiMatch(MultiMatch multiMatch, C context) {
+ return visitScalarFunction(multiMatch, context);
+ }
}
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index 0368be097c9..5a0bc63f3c9 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2206,6 +2206,11 @@ visible_functions = {
[['ignore'], 'BOOLEAN', ['ARRAY_DECIMAL128', '...'],
'ALWAYS_NOT_NULLABLE'],
[['ignore'], 'BOOLEAN', ['ARRAY_VARCHAR', '...'],
'ALWAYS_NOT_NULLABLE'],
[['ignore'], 'BOOLEAN', ['ARRAY_STRING', '...'], 'ALWAYS_NOT_NULLABLE']
+ ],
+
+ # multi match functions
+ "MultiMatch": [
+ [['multi_match'], 'BOOLEAN', ['STRING', 'STRING', 'STRING', 'STRING'],
'ALWAYS_NOT_NULLABLE']
]
}
diff --git a/regression-test/data/inverted_index_p0/test_index_multi_match.out
b/regression-test/data/inverted_index_p0/test_index_multi_match.out
new file mode 100644
index 00000000000..77e3c86623e
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_index_multi_match.out
@@ -0,0 +1,49 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+178
+
+-- !sql --
+180
+
+-- !sql --
+859
+
+-- !sql --
+44
+
+-- !sql --
+178
+
+-- !sql --
+180
+
+-- !sql --
+859
+
+-- !sql --
+44
+
+-- !sql --
+178
+
+-- !sql --
+180
+
+-- !sql --
+859
+
+-- !sql --
+44
+
+-- !sql --
+178
+
+-- !sql --
+180
+
+-- !sql --
+859
+
+-- !sql --
+44
+
diff --git
a/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
b/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
new file mode 100644
index 00000000000..90f9f7a751b
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_index_multi_match", "p0"){
+ def indexTbName1 = "test_index_multi_match_1"
+ def indexTbName2 = "test_index_multi_match_2"
+ def indexTbName3 = "test_index_multi_match_3"
+ def indexTbName4 = "test_index_multi_match_4"
+
+ sql "DROP TABLE IF EXISTS ${indexTbName1}"
+ sql "DROP TABLE IF EXISTS ${indexTbName2}"
+ sql "DROP TABLE IF EXISTS ${indexTbName3}"
+ sql "DROP TABLE IF EXISTS ${indexTbName4}"
+
+ def create_table = {table_name, idx_version ->
+ sql """
+ CREATE TABLE ${table_name} (
+ `@timestamp` int(11) NULL COMMENT "",
+ `clientip` text NULL COMMENT "",
+ `request` text NULL COMMENT "",
+ `status` text NULL COMMENT "",
+ `size` text NULL COMMENT "",
+ INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
+ INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
+ INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
+ INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`@timestamp`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "inverted_index_storage_format" = "${idx_version}",
+ "disable_auto_compaction" = "true"
+ );
+ """
+ }
+
+ def load_httplogs_data = {table_name, label, read_flag, format_flag,
file_name, ignore_failure=false,
+ expected_succ_rows = -1, load_to_single_tablet =
'true' ->
+
+ // load the json data
+ streamLoad {
+ table "${table_name}"
+
+ // set http request header params
+ set 'label', label + "_" + UUID.randomUUID().toString()
+ set 'read_json_by_line', read_flag
+ set 'format', format_flag
+ file file_name // import json file
+ time 10000 // limit inflight 10s
+ if (expected_succ_rows >= 0) {
+ set 'max_filter_ratio', '1'
+ }
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (ignore_failure && expected_succ_rows < 0) { return }
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ if (expected_succ_rows >= 0) {
+ assertEquals(json.NumberLoadedRows, expected_succ_rows)
+ } else {
+ assertEquals(json.NumberTotalRows,
json.NumberLoadedRows + json.NumberUnselectedRows)
+ assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes
> 0)
+ }
+ }
+ }
+ }
+
+ try {
+ create_table(indexTbName1, 'V1')
+ create_table(indexTbName2, 'V2')
+ create_table(indexTbName3, 'V1')
+ create_table(indexTbName4, 'V2')
+
+ load_httplogs_data.call(indexTbName1, 'test_index_multi_match_1',
'true', 'json', 'documents-1000.json')
+ load_httplogs_data.call(indexTbName2, 'test_index_multi_match_2',
'true', 'json', 'documents-1000.json')
+ load_httplogs_data.call(indexTbName3, 'test_index_multi_match_3',
'true', 'json', 'documents-1000.json')
+ load_httplogs_data.call(indexTbName4, 'test_index_multi_match_4',
'true', 'json', 'documents-1000.json')
+
+ sql "sync"
+
+ sql """ set enable_common_expr_pushdown = true """
+
+ qt_sql """ select count() from ${indexTbName1} where (clientip
match_phrase_prefix '2'); """
+ qt_sql """ select count() from ${indexTbName1} where (clientip
match_phrase_prefix '2' or request match_phrase_prefix '2'); """
+ qt_sql """ select count() from ${indexTbName1} where (clientip
match_phrase_prefix '2' or request match_phrase_prefix '2' or status
match_phrase_prefix '2' or size match_phrase_prefix '2'); """
+ qt_sql """ select count() from ${indexTbName1} where (clientip
match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status
match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """
+
+ qt_sql """ select count() from ${indexTbName2} where (clientip
match_phrase_prefix '2'); """
+ qt_sql """ select count() from ${indexTbName2} where (clientip
match_phrase_prefix '2' or request match_phrase_prefix '2'); """
+ qt_sql """ select count() from ${indexTbName2} where (clientip
match_phrase_prefix '2' or request match_phrase_prefix '2' or status
match_phrase_prefix '2' or size match_phrase_prefix '2'); """
+ qt_sql """ select count() from ${indexTbName2} where (clientip
match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status
match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """
+
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, '', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, 'request', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
+
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, '', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, 'request', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
+
+ } finally {
+ //try_sql("DROP TABLE IF EXISTS ${testTable}")
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]