friendlymatthew commented on code in PR #8838:
URL: https://github.com/apache/arrow-rs/pull/8838#discussion_r2524820204
##########
arrow-ord/src/ord.rs:
##########
@@ -296,6 +296,72 @@ fn compare_struct(
Ok(f)
}
+fn compare_union(
+ left: &dyn Array,
+ right: &dyn Array,
+ opts: SortOptions,
+) -> Result<DynComparator, ArrowError> {
+ let left = left.as_union();
+ let right = right.as_union();
+
+ let (left_fields, left_mode) = match left.data_type() {
+ DataType::Union(fields, mode) => (fields, mode),
+ _ => unreachable!(),
+ };
+ let (right_fields, right_mode) = match right.data_type() {
+ DataType::Union(fields, mode) => (fields, mode),
+ _ => unreachable!(),
+ };
+
+ if left_fields != right_fields || left_mode != right_mode {
+ return Err(ArrowError::InvalidArgumentError(
+ "Cannot compare UnionArrays with different fields or
modes".to_string(),
+ ));
+ }
+
+ let c_opts = child_opts(opts);
+
+ let max_type_id = left_fields.iter().map(|(id, _)| id).max().unwrap_or(0);
+ let mut field_comparators: Vec<Option<DynComparator>> =
+ Vec::with_capacity((max_type_id + 1) as usize);
+ field_comparators.resize_with((max_type_id + 1) as usize, || None);
Review Comment:
the comparison closure `f` searches through a direct-indexed Vec of
comparators where the type id serves as the array index, giving us O(1) lookups
though this could potentially create sparse vectors when users assign
non-contiguous type ids (worst case: 128 slots for 1 field), I chose this over
a hash map or linear scan
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]