Mryange commented on code in PR #59591:
URL: https://github.com/apache/doris/pull/59591#discussion_r2781157051
##########
be/src/pipeline/exec/join/process_hash_table_probe_impl.h:
##########
@@ -598,6 +977,227 @@ Status
ProcessHashTableProbe<JoinOpType>::do_mark_join_conjuncts(vectorized::Blo
}
}
+// Helper struct for ASOF JOIN match context
+// FE guarantees: MATCH_CONDITION left side = probe table columns, right side
= build table columns
+// This is ensured by commute() during join swap in LogicalJoin.swap()
+struct AsofMatchContext {
+ // Materialized results of MATCH_CONDITION expressions
+ // Left side of expression = probe_col (guaranteed by FE)
+ // Right side of expression = build_col (guaranteed by FE)
+ const vectorized::IColumn* probe_col = nullptr; // left side of
MATCH_CONDITION
+ const vectorized::IColumn* build_col = nullptr; // right side of
MATCH_CONDITION
+ const uint8_t* conjunct_filter = nullptr; // pre-evaluated condition
filter
+ bool find_largest = false; // true: find LARGEST match, false: find
SMALLEST match
+ bool is_strict = false; // true for strict inequality (> or <)
+ bool is_right_join = false; // true for ASOF RIGHT JOIN (build-driven)
+
+ // Check if a row satisfies the inequality condition
+ // Also checks for NULL values which should not match anything
+ bool satisfies(size_t row_idx) const {
+ if (!conjunct_filter || conjunct_filter[row_idx] == 0) {
+ return false;
+ }
+ // Check if probe or build column value is NULL - NULL comparisons
should not match
+ if (probe_col) {
+ if (probe_col->is_nullable()) {
+ const auto* nullable_col =
+ static_cast<const
vectorized::ColumnNullable*>(probe_col);
+ if (nullable_col->is_null_at(row_idx)) {
+ return false;
+ }
+ }
+ }
+ if (build_col) {
+ if (build_col->is_nullable()) {
+ const auto* nullable_col =
+ static_cast<const
vectorized::ColumnNullable*>(build_col);
+ if (nullable_col->is_null_at(row_idx)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ // Compare two candidate values to determine which is "better" (closer
match)
+ // For LEFT JOIN: compare build values - find best build for each probe
+ // For RIGHT JOIN: compare probe values - find best probe for each build
+ // Returns true if 'current' is better than 'best'
+ bool is_better_match(size_t current_idx, size_t best_idx) const {
+ // LEFT JOIN: compare build_col (right side of expression)
+ // RIGHT JOIN: compare probe_col (left side of expression)
+ const vectorized::IColumn* cmp_col = is_right_join ? probe_col :
build_col;
+ if (!cmp_col) {
+ return false;
+ }
+ int cmp = cmp_col->compare_at(current_idx, best_idx, *cmp_col, 1);
+ return find_largest ? (cmp > 0) : (cmp < 0);
+ }
+};
+
+template <int JoinOpType>
+Status
ProcessHashTableProbe<JoinOpType>::do_asof_join_conjuncts(vectorized::Block*
output_block,
+
DorisVector<uint8_t>& visited) {
+ auto row_count = output_block->rows();
+ if (!row_count) {
+ return Status::OK();
+ }
+
+ SCOPED_TIMER(_parent->_non_equal_join_conjuncts_timer);
+ size_t orig_columns = output_block->columns();
+ constexpr bool is_outer_join = JoinOpType == TJoinOp::ASOF_LEFT_OUTER_JOIN
||
+ JoinOpType ==
TJoinOp::ASOF_RIGHT_OUTER_JOIN;
+ constexpr bool is_right_join = JoinOpType ==
TJoinOp::ASOF_RIGHT_INNER_JOIN ||
+ JoinOpType ==
TJoinOp::ASOF_RIGHT_OUTER_JOIN;
+
+ // Initialize match context with basic settings
+ AsofMatchContext ctx;
+ ctx.is_strict = _parent->_shared_state->asof_inequality_is_strict;
+ ctx.is_right_join = is_right_join;
+
+ // FE validates ASOF JOIN must have MATCH_CONDITION, so this is guaranteed
non-empty
+ if (_parent->_other_join_conjuncts.size() != 1) [[unlikely]] {
+ return Status::FatalError("ASOF JOIN requires MATCH_CONDITION, should
be validated by FE");
+ }
+ auto& conjunct = _parent->_other_join_conjuncts[0];
+ if (conjunct == nullptr || conjunct->root() == nullptr ||
+ conjunct->root()->get_num_children() != 2) [[unlikely]] {
+ return Status::FatalError("MATCH_CONDITION must be a binary
comparison, but got " +
+ conjunct->root()->debug_string());
+ }
+
+ auto left_child = conjunct->root()->get_child(0); // probe side
(guaranteed by FE)
+ auto right_child = conjunct->root()->get_child(1); // build side
(guaranteed by FE)
+
+ // Execute both sides to get materialized results
+ int left_tmp_idx = -1, right_tmp_idx = -1;
+ RETURN_IF_ERROR(left_child->execute(conjunct.get(), output_block,
&left_tmp_idx));
+ RETURN_IF_ERROR(right_child->execute(conjunct.get(), output_block,
&right_tmp_idx));
+
+ // Left side = probe_col, Right side = build_col (guaranteed by FE commute)
+ // Materialize both sides of MATCH_CONDITION for unified comparison
+ // FE guarantees: MATCH_CONDITION exists and is a binary comparison
predicate
+ // and left side = probe columns, right side = build columns
+ vectorized::IColumn::Filter conjunct_filter(row_count, 1);
+ DCHECK(left_tmp_idx >= 0 && left_tmp_idx < (int)output_block->columns());
+ DCHECK(right_tmp_idx >= 0 && right_tmp_idx < (int)output_block->columns());
+ vectorized::ColumnPtr probe_result_holder =
+
output_block->get_by_position(left_tmp_idx).column->convert_to_full_column_if_const();
+ ctx.probe_col = probe_result_holder.get();
+ vectorized::ColumnPtr build_result_holder =
+
output_block->get_by_position(right_tmp_idx).column->convert_to_full_column_if_const();
+ ctx.build_col = build_result_holder.get();
+
+ // Determine find_largest based on ASOF JOIN semantics:
+ // - LEFT JOIN + (probe >= build) → find largest build → find_largest=true
+ // - LEFT JOIN + (probe <= build) → find smallest build →
find_largest=false
+ // - RIGHT JOIN + (probe >= build) → find smallest probe →
find_largest=false
+ // - RIGHT JOIN + (probe <= build) → find largest probe → find_largest=true
+ // Unified formula: find_largest = is_greater XOR is_right_join
+ bool is_greater = _parent->_shared_state->asof_inequality_is_greater;
+ ctx.find_largest = is_greater != is_right_join;
+
+ // Evaluate conjunct filter for satisfies() check
+ bool can_be_filter_all = false;
+ RETURN_IF_ERROR(vectorized::VExprContext::execute_conjuncts(
Review Comment:
这里处理一下can_be_filter_all
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]