github-actions[bot] commented on code in PR #41385: URL: https://github.com/apache/doris/pull/41385#discussion_r1777967976
########## be/src/vec/exprs/vexpr.cpp: ########## @@ -602,80 +602,134 @@ Status VExpr::get_result_from_const(vectorized::Block* block, const std::string& return Status::OK(); } -bool VExpr::fast_execute(Block& block, const ColumnNumbers& arguments, size_t result, - size_t input_rows_count, const std::string& function_name) { - if (!_enable_inverted_index_query) { - return false; +Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function, Review Comment: warning: function '_evaluate_inverted_index' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function, ^ ``` <details> <summary>Additional context</summary> **be/src/vec/exprs/vexpr.cpp:604:** 106 lines including whitespace and comments (threshold 80) ```cpp Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function, ^ ``` </details> ########## be/src/vec/exprs/vcompound_pred.h: ########## @@ -53,7 +54,107 @@ class VCompoundPred : public VectorizedFnCall { const std::string& expr_name() const override { return _expr_name; } + Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override { Review Comment: warning: function 'evaluate_inverted_index' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override { ^ ``` <details> <summary>Additional context</summary> **be/src/vec/exprs/vcompound_pred.h:56:** 95 lines including whitespace and comments (threshold 80) ```cpp Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override { ^ ``` </details> ########## be/src/vec/functions/match.cpp: ########## @@ -24,85 +24,133 @@ #include "util/debug_points.h" namespace doris::vectorized { +Status FunctionMatchBase::evaluate_inverted_index( + const ColumnsWithTypeAndName& arguments, + const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names, + std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows, + segment_v2::InvertedIndexResultBitmap& bitmap_result) const { + DCHECK(arguments.size() == 1); + DCHECK(data_type_with_names.size() == 1); + DCHECK(iterators.size() == 1); + auto* iter = iterators[0]; + auto data_type_with_name = data_type_with_names[0]; + if (iter == nullptr) { + return Status::OK(); + } + const std::string& function_name = get_name(); + + if (function_name == MATCH_PHRASE_FUNCTION || function_name == MATCH_PHRASE_PREFIX_FUNCTION || + function_name == MATCH_PHRASE_EDGE_FUNCTION) { + if (iter->get_inverted_index_reader_type() == InvertedIndexReaderType::FULLTEXT && + get_parser_phrase_support_string_from_properties(iter->get_index_properties()) == + INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "phrase queries require setting support_phrase = true"); + } + } + std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>(); + Field param_value; + arguments[0].column->get(0, param_value); + auto param_type = arguments[0].type->get_type_as_type_descriptor().type; + if (!is_string_type(param_type)) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "arguments for match must be string"); + } + std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr; + RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type, ¶m_value, + query_param)); + if (is_string_type(param_type)) { + auto inverted_index_query_type = get_query_type_from_fn_name(); + RETURN_IF_ERROR( + iter->read_from_inverted_index(data_type_with_name.first, query_param->get_value(), + inverted_index_query_type, num_rows, roaring)); + } else { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "invalid params type for FunctionMatchBase::evaluate_inverted_index {}", + param_type); + } + std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>(); + if (iter->has_null()) { + segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle; + RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); + null_bitmap = null_bitmap_cache_handle.get_bitmap(); + } + segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap); + bitmap_result = result; + bitmap_result.mask_out_null(); + return Status::OK(); +} Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const { ColumnPtr& column_ptr = block.get_by_position(arguments[1]).column; DataTypePtr& type_ptr = block.get_by_position(arguments[1]).type; auto match_query_str = type_ptr->to_string(*column_ptr, 0); std::string column_name = block.get_by_position(arguments[0]).name; - auto match_pred_column_name = - BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_match_" + match_query_str; - if (!block.has(match_pred_column_name)) { - VLOG_DEBUG << "begin to execute match directly, column_name=" << column_name - << ", match_query_str=" << match_query_str; - InvertedIndexCtx* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>( - context->get_function_state(FunctionContext::THREAD_LOCAL)); - if (inverted_index_ctx == nullptr) { - inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>( - context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); - } + VLOG_DEBUG << "begin to execute match directly, column_name=" << column_name + << ", match_query_str=" << match_query_str; + InvertedIndexCtx* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>( Review Comment: warning: use auto when initializing with a cast to avoid duplicating the type name [modernize-use-auto] ```suggestion auto* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>( ``` ########## be/src/vec/exprs/vcompound_pred.h: ########## @@ -53,7 +54,107 @@ const std::string& expr_name() const override { return _expr_name; } + Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override { + segment_v2::InvertedIndexResultBitmap res; + bool all_pass = true; + + switch (_op) { + case TExprOpcode::COMPOUND_OR: { + for (const auto& child : _children) { + if (Status st = child->evaluate_inverted_index(context, segment_num_rows); + !st.ok()) { + LOG(ERROR) << "expr:" << child->expr_name() + << " evaluate_inverted_index error:" << st.to_string(); + all_pass = false; + continue; + } + if (context->get_inverted_index_context()->has_inverted_index_result_for_expr( + child.get())) { + const auto* index_result = + context->get_inverted_index_context() + ->get_inverted_index_result_for_expr(child.get()); + if (res.is_empty()) { + res = *index_result; + } else { + res |= *index_result; + } + if (res.get_data_bitmap()->cardinality() == segment_num_rows) { + break; // Early exit if result is full + } + } else { + all_pass = false; + } + } + break; + } + case TExprOpcode::COMPOUND_AND: { + for (const auto& child : _children) { + if (Status st = child->evaluate_inverted_index(context, segment_num_rows); + !st.ok()) { + LOG(ERROR) << "expr:" << child->expr_name() + << " evaluate_inverted_index error:" << st.to_string(); + all_pass = false; + continue; + } + if (context->get_inverted_index_context()->has_inverted_index_result_for_expr( + child.get())) { + const auto* index_result = + context->get_inverted_index_context() + ->get_inverted_index_result_for_expr(child.get()); + if (res.is_empty()) { + res = *index_result; + } else { + res &= *index_result; + } + + if (res.get_data_bitmap()->isEmpty()) { + break; // Early exit if result is empty + } + } else { + all_pass = false; + } + } + break; + } + case TExprOpcode::COMPOUND_NOT: { + const auto& child = _children[0]; + Status st = child->evaluate_inverted_index(context, segment_num_rows); + if (!st.ok()) { + LOG(ERROR) << "expr:" << child->expr_name() + << " evaluate_inverted_index error:" << st.to_string(); + return st; + } + + if (context->get_inverted_index_context()->has_inverted_index_result_for_expr( + child.get())) { + const auto* index_result = + context->get_inverted_index_context()->get_inverted_index_result_for_expr( + child.get()); + roaring::Roaring full_result; + full_result.addRange(0, segment_num_rows); + res = index_result->op_not(&full_result); + } else { + all_pass = false; + } + break; + } + default: + return Status::NotSupported( + "Compound operator must be AND, OR, or NOT to execute with inverted index."); + } + + if (all_pass && !res.is_empty()) { + // set fast_execute when expr evaluated by inverted index correctly + _can_fast_execute = true; + context->get_inverted_index_context()->set_inverted_index_result_for_expr(this, res); + } + return Status::OK(); + } + Status execute(VExprContext* context, Block* block, int* result_column_id) override { Review Comment: warning: function 'execute' has cognitive complexity of 112 (threshold 50) [readability-function-cognitive-complexity] ```cpp Status execute(VExprContext* context, Block* block, int* result_column_id) override { ^ ``` <details> <summary>Additional context</summary> **be/src/vec/exprs/vcompound_pred.h:154:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (_can_fast_execute && fast_execute(context, block, result_column_id)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:154:** +1 ```cpp if (_can_fast_execute && fast_execute(context, block, result_column_id)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:157:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (children().size() == 1 || !_all_child_is_compound_and_not_const()) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:163:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id)); ^ ``` **be/src/common/status.h:619:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exprs/vcompound_pred.h:163:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id)); ^ ``` **be/src/common/status.h:621:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exprs/vcompound_pred.h:175:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (lhs_is_nullable) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:189:** nesting level increased to 1 ```cpp auto get_rhs_colum = [&]() { ^ ``` **be/src/vec/exprs/vcompound_pred.h:190:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (rhs_id == -1) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:191:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(_children[1]->execute(context, block, &rhs_id)); ^ ``` **be/src/common/status.h:619:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exprs/vcompound_pred.h:191:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(_children[1]->execute(context, block, &rhs_id)); ^ ``` **be/src/common/status.h:621:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exprs/vcompound_pred.h:201:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (rhs_is_nullable) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:209:** nesting level increased to 1 ```cpp auto return_result_column_id = [&](ColumnPtr res_column, int res_id) -> int { ^ ``` **be/src/vec/exprs/vcompound_pred.h:210:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (result_is_nullable && !res_column->is_nullable()) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:210:** +1 ```cpp if (result_is_nullable && !res_column->is_nullable()) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:219:** nesting level increased to 1 ```cpp auto create_null_map_column = [&](ColumnPtr& null_map_column, ^ ``` **be/src/vec/exprs/vcompound_pred.h:221:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (null_map_data == nullptr) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:230:** nesting level increased to 1 ```cpp auto vector_vector_null = [&]<bool is_and_op>() { ^ ``` **be/src/vec/exprs/vcompound_pred.h:240:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if constexpr (is_and_op) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:241:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < size; ++i) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:246:** +1, nesting level increased to 2 ```cpp } else { ^ ``` **be/src/vec/exprs/vcompound_pred.h:247:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < size; ++i) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:260:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (_op == TExprOpcode::COMPOUND_AND) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:263:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:263:** +1 ```cpp if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:263:** +1 ```cpp if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:263:** +1 ```cpp if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:266:** +1, nesting level increased to 2 ```cpp } else { ^ ``` **be/src/vec/exprs/vcompound_pred.h:267:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(get_rhs_colum()); ^ ``` **be/src/common/status.h:619:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exprs/vcompound_pred.h:267:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(get_rhs_colum()); ^ ``` **be/src/common/status.h:621:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exprs/vcompound_pred.h:269:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if ((lhs_all_true && !lhs_is_nullable) || //not null column ^ ``` **be/src/vec/exprs/vcompound_pred.h:269:** +1 ```cpp if ((lhs_all_true && !lhs_is_nullable) || //not null column ^ ``` **be/src/vec/exprs/vcompound_pred.h:269:** +1 ```cpp if ((lhs_all_true && !lhs_is_nullable) || //not null column ^ ``` **be/src/vec/exprs/vcompound_pred.h:270:** +1 ```cpp (lhs_all_true && lhs_all_is_not_null)) { //nullable column ^ ``` **be/src/vec/exprs/vcompound_pred.h:273:** +1, nesting level increased to 3 ```cpp } else if ((rhs_all_false && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:273:** +1 ```cpp } else if ((rhs_all_false && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:273:** +1 ```cpp } else if ((rhs_all_false && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:274:** +1 ```cpp (rhs_all_false && rhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:277:** +1, nesting level increased to 3 ```cpp } else if ((rhs_all_true && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:277:** +1 ```cpp } else if ((rhs_all_true && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:277:** +1 ```cpp } else if ((rhs_all_true && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:278:** +1 ```cpp (rhs_all_true && rhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:281:** +1, nesting level increased to 3 ```cpp } else { ^ ``` **be/src/vec/exprs/vcompound_pred.h:282:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (!result_is_nullable) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:284:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp for (size_t i = 0; i < size; i++) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:287:** +1, nesting level increased to 4 ```cpp } else { ^ ``` **be/src/vec/exprs/vcompound_pred.h:292:** +1, nesting level increased to 1 ```cpp } else if (_op == TExprOpcode::COMPOUND_OR) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:295:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:295:** +1 ```cpp if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:295:** +1 ```cpp if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:295:** +1 ```cpp if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:298:** +1, nesting level increased to 2 ```cpp } else { ^ ``` **be/src/vec/exprs/vcompound_pred.h:299:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_ERROR(get_rhs_colum()); ^ ``` **be/src/common/status.h:619:** expanded from macro 'RETURN_IF_ERROR' ```cpp do { \ ^ ``` **be/src/vec/exprs/vcompound_pred.h:299:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_ERROR(get_rhs_colum()); ^ ``` **be/src/common/status.h:621:** expanded from macro 'RETURN_IF_ERROR' ```cpp if (UNLIKELY(!_status_.ok())) { \ ^ ``` **be/src/vec/exprs/vcompound_pred.h:300:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:300:** +1 ```cpp if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:300:** +1 ```cpp if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:300:** +1 ```cpp if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:303:** +1, nesting level increased to 3 ```cpp } else if ((rhs_all_true && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:303:** +1 ```cpp } else if ((rhs_all_true && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:303:** +1 ```cpp } else if ((rhs_all_true && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:304:** +1 ```cpp (rhs_all_true && rhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:307:** +1, nesting level increased to 3 ```cpp } else if ((rhs_all_false && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:307:** +1 ```cpp } else if ((rhs_all_false && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:307:** +1 ```cpp } else if ((rhs_all_false && !rhs_is_nullable) || ^ ``` **be/src/vec/exprs/vcompound_pred.h:308:** +1 ```cpp (rhs_all_false && rhs_all_is_not_null)) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:311:** +1, nesting level increased to 3 ```cpp } else { ^ ``` **be/src/vec/exprs/vcompound_pred.h:312:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (!result_is_nullable) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:314:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp for (size_t i = 0; i < size; i++) { ^ ``` **be/src/vec/exprs/vcompound_pred.h:317:** +1, nesting level increased to 4 ```cpp } else { ^ ``` **be/src/vec/exprs/vcompound_pred.h:322:** +1, nesting level increased to 1 ```cpp } else { ^ ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org