Dandandan commented on a change in pull request #9070: URL: https://github.com/apache/arrow/pull/9070#discussion_r551052983
########## File path: rust/datafusion/src/physical_plan/hash_join.rs ########## @@ -48,23 +54,15 @@ use super::{ use crate::error::{DataFusionError, Result}; use super::{ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream}; +use crate::physical_plan::coalesce_batches::concat_batches; use ahash::RandomState; use log::debug; -// An index of (batch, row) uniquely identifying a row in a part. -type Index = (usize, usize); -// A pair (left index, right index) -// Note that while this is currently equal to `Index`, the `JoinIndex` is semantically different -// as a left join may issue None indices, in which case -type JoinIndex = Option<(usize, usize)>; -// An index of row uniquely identifying a row in a batch -type RightIndex = Option<u32>; - // Maps ["on" value] -> [list of indices with this key's value] // E.g. [1, 2] -> [(0, 3), (1, 6), (0, 8)] indicates that (column1, column2) = [1, 2] is true // for rows 3 and 8 from batch 0 and row 6 from batch 1. -type JoinHashMap = HashMap<Vec<u8>, Vec<Index>, RandomState>; -type JoinLeftData = Arc<(JoinHashMap, Vec<RecordBatch>)>; Review comment: Thanks for the explanation @andygrove 👍 ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org