tustvold commented on code in PR #4613:
URL: https://github.com/apache/arrow-rs/pull/4613#discussion_r1280464293


##########
arrow-ord/src/sort.rs:
##########
@@ -834,200 +608,6 @@ where
     (values_indices, run_values)
 }
 
-/// Sort strings
-fn sort_string<Offset: OffsetSizeTrait>(
-    values: &dyn Array,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> UInt32Array {
-    let values = values
-        .as_any()
-        .downcast_ref::<GenericStringArray<Offset>>()
-        .unwrap();
-
-    sort_string_helper(
-        values,
-        value_indices,
-        null_indices,
-        options,
-        limit,
-        |array, idx| array.value(idx as usize),
-    )
-}
-
-/// shared implementation between dictionary encoded and plain string arrays
-#[inline]
-fn sort_string_helper<'a, A: Array, F>(
-    values: &'a A,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-    value_fn: F,
-) -> UInt32Array
-where
-    F: Fn(&'a A, u32) -> &str,
-{
-    let mut valids = value_indices
-        .into_iter()
-        .map(|index| (index, value_fn(values, index)))
-        .collect::<Vec<(u32, &str)>>();
-    let mut nulls = null_indices;
-    let descending = options.descending;
-    let mut len = values.len();
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-
-    sort_valids(descending, &mut valids, len, cmp);
-    // collect the order of valid tuplies
-    let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| 
tuple.0).collect();
-
-    if options.nulls_first {
-        nulls.append(&mut valid_indices);
-        nulls.truncate(len);
-        UInt32Array::from(nulls)
-    } else {
-        // no need to sort nulls as they are in the correct order already
-        valid_indices.append(&mut nulls);
-        valid_indices.truncate(len);
-        UInt32Array::from(valid_indices)
-    }
-}
-
-fn sort_list<S>(
-    values: &dyn Array,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> UInt32Array
-where
-    S: OffsetSizeTrait,
-{
-    sort_list_inner::<S>(values, value_indices, null_indices, options, limit)
-}
-
-fn sort_list_inner<S>(
-    values: &dyn Array,
-    value_indices: Vec<u32>,
-    mut null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> UInt32Array
-where
-    S: OffsetSizeTrait,
-{
-    let mut valids: Vec<(u32, ArrayRef)> = values
-        .as_any()
-        .downcast_ref::<FixedSizeListArray>()
-        .map_or_else(
-            || {
-                let values = as_generic_list_array::<S>(values);
-                value_indices
-                    .iter()
-                    .copied()
-                    .map(|index| (index, values.value(index as usize)))
-                    .collect()
-            },
-            |values| {
-                value_indices
-                    .iter()
-                    .copied()
-                    .map(|index| (index, values.value(index as usize)))
-                    .collect()
-            },
-        );
-
-    let mut len = values.len();
-    let descending = options.descending;
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    sort_valids_array(descending, &mut valids, &mut null_indices, len);
-
-    let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| 
tuple.0).collect();
-    if options.nulls_first {
-        null_indices.append(&mut valid_indices);
-        null_indices.truncate(len);
-        UInt32Array::from(null_indices)
-    } else {
-        valid_indices.append(&mut null_indices);
-        valid_indices.truncate(len);
-        UInt32Array::from(valid_indices)
-    }
-}
-
-fn sort_binary<S>(
-    values: &dyn Array,
-    value_indices: Vec<u32>,
-    mut null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> UInt32Array
-where
-    S: OffsetSizeTrait,
-{
-    let mut valids: Vec<(u32, &[u8])> = values
-        .as_any()
-        .downcast_ref::<FixedSizeBinaryArray>()
-        .map_or_else(
-            || {
-                let values = as_generic_binary_array::<S>(values);
-                value_indices
-                    .iter()
-                    .copied()
-                    .map(|index| (index, values.value(index as usize)))
-                    .collect()
-            },
-            |values| {
-                value_indices
-                    .iter()
-                    .copied()
-                    .map(|index| (index, values.value(index as usize)))
-                    .collect()
-            },
-        );
-
-    let mut len = values.len();
-    let descending = options.descending;
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-
-    sort_valids(descending, &mut valids, len, cmp);
-
-    let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| 
tuple.0).collect();
-    if options.nulls_first {
-        null_indices.append(&mut valid_indices);
-        null_indices.truncate(len);
-        UInt32Array::from(null_indices)
-    } else {
-        valid_indices.append(&mut null_indices);
-        valid_indices.truncate(len);
-        UInt32Array::from(valid_indices)
-    }
-}
-
-/// Compare two `Array`s based on the ordering defined in [build_compare]
-fn cmp_array(a: &dyn Array, b: &dyn Array) -> Ordering {
-    let cmp_op = build_compare(a, b).unwrap();

Review Comment:
   This was getting called per array comparison, doing all this dispatch and 
allocation logic every time! The performance would have been catastrophic 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to