Dandandan commented on a change in pull request #24:
URL: https://github.com/apache/arrow-datafusion/pull/24#discussion_r620643551
##########
File path: datafusion/src/physical_plan/hash_join.rs
##########
@@ -781,58 +846,140 @@ pub fn create_hashes<'a>(
random_state: &RandomState,
hashes_buffer: &'a mut Vec<u64>,
) -> Result<&'a mut Vec<u64>> {
+ // combine hashes with `combine_hashes` if we have more than 1 column
+ let multi_col = arrays.len() > 1;
+
for col in arrays {
match col.data_type() {
DataType::UInt8 => {
- hash_array!(UInt8Array, col, u8, hashes_buffer, random_state);
+ hash_array_primitive!(
+ UInt8Array,
+ col,
+ u8,
+ hashes_buffer,
+ random_state,
+ multi_col
+ );
}
DataType::UInt16 => {
- hash_array!(UInt16Array, col, u16, hashes_buffer,
random_state);
+ hash_array_primitive!(
+ UInt16Array,
+ col,
+ u16,
+ hashes_buffer,
+ random_state,
+ multi_col
+ );
}
DataType::UInt32 => {
- hash_array!(UInt32Array, col, u32, hashes_buffer,
random_state);
+ hash_array_primitive!(
+ UInt32Array,
+ col,
+ u32,
+ hashes_buffer,
+ random_state,
+ multi_col
+ );
}
DataType::UInt64 => {
- hash_array!(UInt64Array, col, u64, hashes_buffer,
random_state);
+ hash_array_primitive!(
+ UInt64Array,
+ col,
+ u64,
+ hashes_buffer,
+ random_state,
+ multi_col
+ );
}
DataType::Int8 => {
- hash_array!(Int8Array, col, i8, hashes_buffer, random_state);
+ hash_array_primitive!(
+ Int8Array,
+ col,
+ i8,
+ hashes_buffer,
+ random_state,
+ multi_col
+ );
}
DataType::Int16 => {
- hash_array!(Int16Array, col, i16, hashes_buffer, random_state);
+ hash_array_primitive!(
+ Int16Array,
+ col,
+ i16,
+ hashes_buffer,
+ random_state,
+ multi_col
+ );
}
DataType::Int32 => {
- hash_array!(Int32Array, col, i32, hashes_buffer, random_state);
+ hash_array_primitive!(
+ Int32Array,
+ col,
+ i32,
+ hashes_buffer,
+ random_state,
+ multi_col
+ );
}
DataType::Int64 => {
- hash_array!(Int64Array, col, i64, hashes_buffer, random_state);
+ hash_array_primitive!(
+ Int64Array,
+ col,
+ i64,
+ hashes_buffer,
+ random_state,
+ multi_col
+ );
}
DataType::Timestamp(TimeUnit::Microsecond, None) => {
- hash_array!(
+ hash_array_primitive!(
TimestampMicrosecondArray,
col,
i64,
hashes_buffer,
- random_state
+ random_state,
+ multi_col
);
}
DataType::Timestamp(TimeUnit::Nanosecond, None) => {
- hash_array!(
+ hash_array_primitive!(
TimestampNanosecondArray,
col,
i64,
hashes_buffer,
- random_state
+ random_state,
+ multi_col
);
}
DataType::Boolean => {
- hash_array!(BooleanArray, col, u8, hashes_buffer,
random_state);
+ hash_array_primitive!(
Review comment:
I am wondering, are the bytes not guaranteed to be the same for the same
bitmaps?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]