jackwener commented on code in PR #4377:
URL: https://github.com/apache/arrow-datafusion/pull/4377#discussion_r1032930615


##########
datafusion/core/src/physical_plan/joins/hash_join.rs:
##########
@@ -1441,44 +1181,147 @@ fn equal_rows(
     err.unwrap_or(Ok(res))
 }
 
-// Produces a batch for left-side rows that have/have not been matched during 
the whole join
-fn produce_from_matched(
-    visited_left_side: &BooleanBufferBuilder,
-    schema: &SchemaRef,
-    column_indices: &[ColumnIndex],
-    left_data: &JoinLeftData,
-    unmatched: bool,
-) -> ArrowResult<RecordBatch> {
-    let indices = if unmatched {
-        UInt64Array::from_iter_values(
-            (0..visited_left_side.len())
-                .filter_map(|v| (!visited_left_side.get_bit(v)).then_some(v as 
u64)),
-        )
+// The input is the matched indices for left and right.
+// Adjust the indices according to the join type
+fn adjust_indices_by_join_type(
+    left_indices: UInt64Array,
+    right_indices: UInt32Array,
+    count_right_batch: usize,
+    join_type: JoinType,
+) -> (UInt64Array, UInt32Array) {
+    match join_type {
+        JoinType::Inner => {
+            // matched
+            (left_indices, right_indices)
+        }
+        JoinType::Left => {
+            // matched
+            (left_indices, right_indices)
+            // unmatched left row will be produced in the end of loop, and it 
has been set in the left visited bitmap
+        }
+        JoinType::Right | JoinType::Full => {
+            // matched
+            // unmatched right row will be produced in this batch
+            let right_null_indices = get_anti_indices(count_right_batch, 
&right_indices);
+            // combine the matched and unmatched right result together
+            append_right_indices(left_indices, right_indices, 
right_null_indices)
+        }
+        JoinType::RightSemi => {
+            // need to remove the duplicated record in the right side
+            let right_indices = get_semi_indices(count_right_batch, 
&right_indices);
+            // the left_indices will not be used later for the `right semi` 
join
+            (left_indices, right_indices)
+        }
+        JoinType::RightAnti => {
+            // need to remove the duplicated record in the right side
+            // get the anti index for the right side
+            let right_indices = get_anti_indices(count_right_batch, 
&right_indices);
+            // the left_indices will not be used later for the `right anti` 
join
+            (left_indices, right_indices)
+        }
+        JoinType::LeftSemi | JoinType::LeftAnti => {
+            // matched or unmatched left row will be produced in the end of 
loop
+            (
+                UInt64Array::from_iter_values(vec![]),
+                UInt32Array::from_iter_values(vec![]),

Review Comment:
   🤔 I don't understand why these code



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to