Dandandan commented on code in PR #16380: URL: https://github.com/apache/datafusion/pull/16380#discussion_r2142059305
########## datafusion/physical-plan/src/joins/hash_join.rs: ########## @@ -991,52 +998,72 @@ async fn collect_left_input( let mut hashmap = JoinHashMap::with_capacity(num_rows); let mut hashes_buffer = Vec::new(); - let mut offset = 0; // Updating hashmap starting from the last batch let batches_iter = batches.iter().rev(); - for batch in batches_iter.clone() { + let mut batch_indices: Vec<(usize, usize)> = Vec::new(); + let mut batch_row_total = usize::default(); + + for (batch_id, batch) in batches_iter.enumerate().clone() { + let batch_num_rows = batch.num_rows(); + hashes_buffer.clear(); hashes_buffer.resize(batch.num_rows(), 0); update_hash( &on_left, batch, &mut hashmap, - offset, + batch_row_total, // this is our offset &random_state, &mut hashes_buffer, 0, true, )?; - offset += batch.num_rows(); + + for row_id in 0..batch_num_rows { Review Comment: this can use `extend` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org