github-actions[bot] commented on code in PR #41385:
URL: https://github.com/apache/doris/pull/41385#discussion_r1777967976
##########
be/src/vec/exprs/vexpr.cpp:
##########
@@ -602,80 +602,134 @@ Status VExpr::get_result_from_const(vectorized::Block*
block, const std::string&
return Status::OK();
}
-bool VExpr::fast_execute(Block& block, const ColumnNumbers& arguments, size_t
result,
- size_t input_rows_count, const std::string&
function_name) {
- if (!_enable_inverted_index_query) {
- return false;
+Status VExpr::_evaluate_inverted_index(VExprContext* context, const
FunctionBasePtr& function,
Review Comment:
warning: function '_evaluate_inverted_index' exceeds recommended
size/complexity thresholds [readability-function-size]
```cpp
Status VExpr::_evaluate_inverted_index(VExprContext* context, const
FunctionBasePtr& function,
^
```
<details>
<summary>Additional context</summary>
**be/src/vec/exprs/vexpr.cpp:604:** 106 lines including whitespace and
comments (threshold 80)
```cpp
Status VExpr::_evaluate_inverted_index(VExprContext* context, const
FunctionBasePtr& function,
^
```
</details>
##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,7 +54,107 @@ class VCompoundPred : public VectorizedFnCall {
const std::string& expr_name() const override { return _expr_name; }
+ Status evaluate_inverted_index(VExprContext* context, uint32_t
segment_num_rows) override {
Review Comment:
warning: function 'evaluate_inverted_index' exceeds recommended
size/complexity thresholds [readability-function-size]
```cpp
Status evaluate_inverted_index(VExprContext* context, uint32_t
segment_num_rows) override {
^
```
<details>
<summary>Additional context</summary>
**be/src/vec/exprs/vcompound_pred.h:56:** 95 lines including whitespace and
comments (threshold 80)
```cpp
Status evaluate_inverted_index(VExprContext* context, uint32_t
segment_num_rows) override {
^
```
</details>
##########
be/src/vec/functions/match.cpp:
##########
@@ -24,85 +24,133 @@
#include "util/debug_points.h"
namespace doris::vectorized {
+Status FunctionMatchBase::evaluate_inverted_index(
+ const ColumnsWithTypeAndName& arguments,
+ const std::vector<vectorized::IndexFieldNameAndTypePair>&
data_type_with_names,
+ std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t
num_rows,
+ segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
+ DCHECK(arguments.size() == 1);
+ DCHECK(data_type_with_names.size() == 1);
+ DCHECK(iterators.size() == 1);
+ auto* iter = iterators[0];
+ auto data_type_with_name = data_type_with_names[0];
+ if (iter == nullptr) {
+ return Status::OK();
+ }
+ const std::string& function_name = get_name();
+
+ if (function_name == MATCH_PHRASE_FUNCTION || function_name ==
MATCH_PHRASE_PREFIX_FUNCTION ||
+ function_name == MATCH_PHRASE_EDGE_FUNCTION) {
+ if (iter->get_inverted_index_reader_type() ==
InvertedIndexReaderType::FULLTEXT &&
+
get_parser_phrase_support_string_from_properties(iter->get_index_properties())
==
+ INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO) {
+ return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
+ "phrase queries require setting support_phrase = true");
+ }
+ }
+ std::shared_ptr<roaring::Roaring> roaring =
std::make_shared<roaring::Roaring>();
+ Field param_value;
+ arguments[0].column->get(0, param_value);
+ auto param_type = arguments[0].type->get_type_as_type_descriptor().type;
+ if (!is_string_type(param_type)) {
+ return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
+ "arguments for match must be string");
+ }
+ std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
+
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type,
¶m_value,
+
query_param));
+ if (is_string_type(param_type)) {
+ auto inverted_index_query_type = get_query_type_from_fn_name();
+ RETURN_IF_ERROR(
+ iter->read_from_inverted_index(data_type_with_name.first,
query_param->get_value(),
+ inverted_index_query_type,
num_rows, roaring));
+ } else {
+ return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
+ "invalid params type for
FunctionMatchBase::evaluate_inverted_index {}",
+ param_type);
+ }
+ std::shared_ptr<roaring::Roaring> null_bitmap =
std::make_shared<roaring::Roaring>();
+ if (iter->has_null()) {
+ segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
+ RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
+ null_bitmap = null_bitmap_cache_handle.get_bitmap();
+ }
+ segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
+ bitmap_result = result;
+ bitmap_result.mask_out_null();
+ return Status::OK();
+}
Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t
result,
size_t input_rows_count) const {
ColumnPtr& column_ptr = block.get_by_position(arguments[1]).column;
DataTypePtr& type_ptr = block.get_by_position(arguments[1]).type;
auto match_query_str = type_ptr->to_string(*column_ptr, 0);
std::string column_name = block.get_by_position(arguments[0]).name;
- auto match_pred_column_name =
- BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_match_" +
match_query_str;
- if (!block.has(match_pred_column_name)) {
- VLOG_DEBUG << "begin to execute match directly, column_name=" <<
column_name
- << ", match_query_str=" << match_query_str;
- InvertedIndexCtx* inverted_index_ctx =
reinterpret_cast<InvertedIndexCtx*>(
- context->get_function_state(FunctionContext::THREAD_LOCAL));
- if (inverted_index_ctx == nullptr) {
- inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
-
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
- }
+ VLOG_DEBUG << "begin to execute match directly, column_name=" <<
column_name
+ << ", match_query_str=" << match_query_str;
+ InvertedIndexCtx* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
Review Comment:
warning: use auto when initializing with a cast to avoid duplicating the
type name [modernize-use-auto]
```suggestion
auto* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
```
##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,7 +54,107 @@
const std::string& expr_name() const override { return _expr_name; }
+ Status evaluate_inverted_index(VExprContext* context, uint32_t
segment_num_rows) override {
+ segment_v2::InvertedIndexResultBitmap res;
+ bool all_pass = true;
+
+ switch (_op) {
+ case TExprOpcode::COMPOUND_OR: {
+ for (const auto& child : _children) {
+ if (Status st = child->evaluate_inverted_index(context,
segment_num_rows);
+ !st.ok()) {
+ LOG(ERROR) << "expr:" << child->expr_name()
+ << " evaluate_inverted_index error:" <<
st.to_string();
+ all_pass = false;
+ continue;
+ }
+ if
(context->get_inverted_index_context()->has_inverted_index_result_for_expr(
+ child.get())) {
+ const auto* index_result =
+ context->get_inverted_index_context()
+
->get_inverted_index_result_for_expr(child.get());
+ if (res.is_empty()) {
+ res = *index_result;
+ } else {
+ res |= *index_result;
+ }
+ if (res.get_data_bitmap()->cardinality() ==
segment_num_rows) {
+ break; // Early exit if result is full
+ }
+ } else {
+ all_pass = false;
+ }
+ }
+ break;
+ }
+ case TExprOpcode::COMPOUND_AND: {
+ for (const auto& child : _children) {
+ if (Status st = child->evaluate_inverted_index(context,
segment_num_rows);
+ !st.ok()) {
+ LOG(ERROR) << "expr:" << child->expr_name()
+ << " evaluate_inverted_index error:" <<
st.to_string();
+ all_pass = false;
+ continue;
+ }
+ if
(context->get_inverted_index_context()->has_inverted_index_result_for_expr(
+ child.get())) {
+ const auto* index_result =
+ context->get_inverted_index_context()
+
->get_inverted_index_result_for_expr(child.get());
+ if (res.is_empty()) {
+ res = *index_result;
+ } else {
+ res &= *index_result;
+ }
+
+ if (res.get_data_bitmap()->isEmpty()) {
+ break; // Early exit if result is empty
+ }
+ } else {
+ all_pass = false;
+ }
+ }
+ break;
+ }
+ case TExprOpcode::COMPOUND_NOT: {
+ const auto& child = _children[0];
+ Status st = child->evaluate_inverted_index(context,
segment_num_rows);
+ if (!st.ok()) {
+ LOG(ERROR) << "expr:" << child->expr_name()
+ << " evaluate_inverted_index error:" <<
st.to_string();
+ return st;
+ }
+
+ if
(context->get_inverted_index_context()->has_inverted_index_result_for_expr(
+ child.get())) {
+ const auto* index_result =
+
context->get_inverted_index_context()->get_inverted_index_result_for_expr(
+ child.get());
+ roaring::Roaring full_result;
+ full_result.addRange(0, segment_num_rows);
+ res = index_result->op_not(&full_result);
+ } else {
+ all_pass = false;
+ }
+ break;
+ }
+ default:
+ return Status::NotSupported(
+ "Compound operator must be AND, OR, or NOT to execute with
inverted index.");
+ }
+
+ if (all_pass && !res.is_empty()) {
+ // set fast_execute when expr evaluated by inverted index correctly
+ _can_fast_execute = true;
+
context->get_inverted_index_context()->set_inverted_index_result_for_expr(this,
res);
+ }
+ return Status::OK();
+ }
+
Status execute(VExprContext* context, Block* block, int* result_column_id)
override {
Review Comment:
warning: function 'execute' has cognitive complexity of 112 (threshold 50)
[readability-function-cognitive-complexity]
```cpp
Status execute(VExprContext* context, Block* block, int*
result_column_id) override {
^
```
<details>
<summary>Additional context</summary>
**be/src/vec/exprs/vcompound_pred.h:154:** +1, including nesting penalty of
0, nesting level increased to 1
```cpp
if (_can_fast_execute && fast_execute(context, block,
result_column_id)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:154:** +1
```cpp
if (_can_fast_execute && fast_execute(context, block,
result_column_id)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:157:** +1, including nesting penalty of
0, nesting level increased to 1
```cpp
if (children().size() == 1 ||
!_all_child_is_compound_and_not_const()) {
^
```
**be/src/vec/exprs/vcompound_pred.h:163:** +1, including nesting penalty of
0, nesting level increased to 1
```cpp
RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id));
^
```
**be/src/common/status.h:619:** expanded from macro 'RETURN_IF_ERROR'
```cpp
do { \
^
```
**be/src/vec/exprs/vcompound_pred.h:163:** +2, including nesting penalty of
1, nesting level increased to 2
```cpp
RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id));
^
```
**be/src/common/status.h:621:** expanded from macro 'RETURN_IF_ERROR'
```cpp
if (UNLIKELY(!_status_.ok())) { \
^
```
**be/src/vec/exprs/vcompound_pred.h:175:** +1, including nesting penalty of
0, nesting level increased to 1
```cpp
if (lhs_is_nullable) {
^
```
**be/src/vec/exprs/vcompound_pred.h:189:** nesting level increased to 1
```cpp
auto get_rhs_colum = [&]() {
^
```
**be/src/vec/exprs/vcompound_pred.h:190:** +2, including nesting penalty of
1, nesting level increased to 2
```cpp
if (rhs_id == -1) {
^
```
**be/src/vec/exprs/vcompound_pred.h:191:** +3, including nesting penalty of
2, nesting level increased to 3
```cpp
RETURN_IF_ERROR(_children[1]->execute(context, block,
&rhs_id));
^
```
**be/src/common/status.h:619:** expanded from macro 'RETURN_IF_ERROR'
```cpp
do { \
^
```
**be/src/vec/exprs/vcompound_pred.h:191:** +4, including nesting penalty of
3, nesting level increased to 4
```cpp
RETURN_IF_ERROR(_children[1]->execute(context, block,
&rhs_id));
^
```
**be/src/common/status.h:621:** expanded from macro 'RETURN_IF_ERROR'
```cpp
if (UNLIKELY(!_status_.ok())) { \
^
```
**be/src/vec/exprs/vcompound_pred.h:201:** +3, including nesting penalty of
2, nesting level increased to 3
```cpp
if (rhs_is_nullable) {
^
```
**be/src/vec/exprs/vcompound_pred.h:209:** nesting level increased to 1
```cpp
auto return_result_column_id = [&](ColumnPtr res_column, int res_id)
-> int {
^
```
**be/src/vec/exprs/vcompound_pred.h:210:** +2, including nesting penalty of
1, nesting level increased to 2
```cpp
if (result_is_nullable && !res_column->is_nullable()) {
^
```
**be/src/vec/exprs/vcompound_pred.h:210:** +1
```cpp
if (result_is_nullable && !res_column->is_nullable()) {
^
```
**be/src/vec/exprs/vcompound_pred.h:219:** nesting level increased to 1
```cpp
auto create_null_map_column = [&](ColumnPtr& null_map_column,
^
```
**be/src/vec/exprs/vcompound_pred.h:221:** +2, including nesting penalty of
1, nesting level increased to 2
```cpp
if (null_map_data == nullptr) {
^
```
**be/src/vec/exprs/vcompound_pred.h:230:** nesting level increased to 1
```cpp
auto vector_vector_null = [&]<bool is_and_op>() {
^
```
**be/src/vec/exprs/vcompound_pred.h:240:** +2, including nesting penalty of
1, nesting level increased to 2
```cpp
if constexpr (is_and_op) {
^
```
**be/src/vec/exprs/vcompound_pred.h:241:** +3, including nesting penalty of
2, nesting level increased to 3
```cpp
for (size_t i = 0; i < size; ++i) {
^
```
**be/src/vec/exprs/vcompound_pred.h:246:** +1, nesting level increased to 2
```cpp
} else {
^
```
**be/src/vec/exprs/vcompound_pred.h:247:** +3, including nesting penalty of
2, nesting level increased to 3
```cpp
for (size_t i = 0; i < size; ++i) {
^
```
**be/src/vec/exprs/vcompound_pred.h:260:** +1, including nesting penalty of
0, nesting level increased to 1
```cpp
if (_op == TExprOpcode::COMPOUND_AND) {
^
```
**be/src/vec/exprs/vcompound_pred.h:263:** +2, including nesting penalty of
1, nesting level increased to 2
```cpp
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:263:** +1
```cpp
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:263:** +1
```cpp
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:263:** +1
```cpp
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:266:** +1, nesting level increased to 2
```cpp
} else {
^
```
**be/src/vec/exprs/vcompound_pred.h:267:** +3, including nesting penalty of
2, nesting level increased to 3
```cpp
RETURN_IF_ERROR(get_rhs_colum());
^
```
**be/src/common/status.h:619:** expanded from macro 'RETURN_IF_ERROR'
```cpp
do { \
^
```
**be/src/vec/exprs/vcompound_pred.h:267:** +4, including nesting penalty of
3, nesting level increased to 4
```cpp
RETURN_IF_ERROR(get_rhs_colum());
^
```
**be/src/common/status.h:621:** expanded from macro 'RETURN_IF_ERROR'
```cpp
if (UNLIKELY(!_status_.ok())) { \
^
```
**be/src/vec/exprs/vcompound_pred.h:269:** +3, including nesting penalty of
2, nesting level increased to 3
```cpp
if ((lhs_all_true && !lhs_is_nullable) || //not null
column
^
```
**be/src/vec/exprs/vcompound_pred.h:269:** +1
```cpp
if ((lhs_all_true && !lhs_is_nullable) || //not null
column
^
```
**be/src/vec/exprs/vcompound_pred.h:269:** +1
```cpp
if ((lhs_all_true && !lhs_is_nullable) || //not null
column
^
```
**be/src/vec/exprs/vcompound_pred.h:270:** +1
```cpp
(lhs_all_true && lhs_all_is_not_null)) { //nullable
column
^
```
**be/src/vec/exprs/vcompound_pred.h:273:** +1, nesting level increased to 3
```cpp
} else if ((rhs_all_false && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:273:** +1
```cpp
} else if ((rhs_all_false && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:273:** +1
```cpp
} else if ((rhs_all_false && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:274:** +1
```cpp
(rhs_all_false && rhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:277:** +1, nesting level increased to 3
```cpp
} else if ((rhs_all_true && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:277:** +1
```cpp
} else if ((rhs_all_true && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:277:** +1
```cpp
} else if ((rhs_all_true && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:278:** +1
```cpp
(rhs_all_true && rhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:281:** +1, nesting level increased to 3
```cpp
} else {
^
```
**be/src/vec/exprs/vcompound_pred.h:282:** +4, including nesting penalty of
3, nesting level increased to 4
```cpp
if (!result_is_nullable) {
^
```
**be/src/vec/exprs/vcompound_pred.h:284:** +5, including nesting penalty of
4, nesting level increased to 5
```cpp
for (size_t i = 0; i < size; i++) {
^
```
**be/src/vec/exprs/vcompound_pred.h:287:** +1, nesting level increased to 4
```cpp
} else {
^
```
**be/src/vec/exprs/vcompound_pred.h:292:** +1, nesting level increased to 1
```cpp
} else if (_op == TExprOpcode::COMPOUND_OR) {
^
```
**be/src/vec/exprs/vcompound_pred.h:295:** +2, including nesting penalty of
1, nesting level increased to 2
```cpp
if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:295:** +1
```cpp
if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:295:** +1
```cpp
if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:295:** +1
```cpp
if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:298:** +1, nesting level increased to 2
```cpp
} else {
^
```
**be/src/vec/exprs/vcompound_pred.h:299:** +3, including nesting penalty of
2, nesting level increased to 3
```cpp
RETURN_IF_ERROR(get_rhs_colum());
^
```
**be/src/common/status.h:619:** expanded from macro 'RETURN_IF_ERROR'
```cpp
do { \
^
```
**be/src/vec/exprs/vcompound_pred.h:299:** +4, including nesting penalty of
3, nesting level increased to 4
```cpp
RETURN_IF_ERROR(get_rhs_colum());
^
```
**be/src/common/status.h:621:** expanded from macro 'RETURN_IF_ERROR'
```cpp
if (UNLIKELY(!_status_.ok())) { \
^
```
**be/src/vec/exprs/vcompound_pred.h:300:** +3, including nesting penalty of
2, nesting level increased to 3
```cpp
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:300:** +1
```cpp
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:300:** +1
```cpp
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:300:** +1
```cpp
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:303:** +1, nesting level increased to 3
```cpp
} else if ((rhs_all_true && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:303:** +1
```cpp
} else if ((rhs_all_true && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:303:** +1
```cpp
} else if ((rhs_all_true && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:304:** +1
```cpp
(rhs_all_true && rhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:307:** +1, nesting level increased to 3
```cpp
} else if ((rhs_all_false && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:307:** +1
```cpp
} else if ((rhs_all_false && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:307:** +1
```cpp
} else if ((rhs_all_false && !rhs_is_nullable) ||
^
```
**be/src/vec/exprs/vcompound_pred.h:308:** +1
```cpp
(rhs_all_false && rhs_all_is_not_null)) {
^
```
**be/src/vec/exprs/vcompound_pred.h:311:** +1, nesting level increased to 3
```cpp
} else {
^
```
**be/src/vec/exprs/vcompound_pred.h:312:** +4, including nesting penalty of
3, nesting level increased to 4
```cpp
if (!result_is_nullable) {
^
```
**be/src/vec/exprs/vcompound_pred.h:314:** +5, including nesting penalty of
4, nesting level increased to 5
```cpp
for (size_t i = 0; i < size; i++) {
^
```
**be/src/vec/exprs/vcompound_pred.h:317:** +1, nesting level increased to 4
```cpp
} else {
^
```
**be/src/vec/exprs/vcompound_pred.h:322:** +1, nesting level increased to 1
```cpp
} else {
^
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]