Dandandan commented on a change in pull request #9602: URL: https://github.com/apache/arrow/pull/9602#discussion_r593904547
########## File path: rust/arrow/src/compute/kernels/sort.rs ########## @@ -686,90 +815,124 @@ pub fn lexsort_to_indices(columns: &[SortColumn]) -> Result<UInt32Array> { }; let mut value_indices = (0..row_count).collect::<Vec<usize>>(); - value_indices.sort_by(lex_comparator); + let mut len = value_indices.len(); + + if let Some(limit) = limit { + len = limit.min(len); + } + sort_by(&mut value_indices, len, lex_comparator); Ok(UInt32Array::from( - value_indices - .into_iter() - .map(|i| i as u32) + (&value_indices)[0..len] + .iter() + .map(|i| *i as u32) .collect::<Vec<u32>>(), )) } +pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F) +where + F: FnMut(&T, &T) -> Ordering, +{ + let (before, _mid, _after) = v.select_nth_unstable_by(limit, &mut is_less); + before.sort_unstable_by(is_less); Review comment: Clear, thanks @sundy-li. Maybe some docs can be added about this? I think it probably is not really a problem, but might be surprising by someone using the kernel. And we might want to see whether we could use a (faster) unstable version of the normal sort kernel as well (I think that could be used by DataFusion as well I think). What do you think @alamb ########## File path: rust/arrow/src/compute/kernels/sort.rs ########## @@ -686,90 +815,124 @@ pub fn lexsort_to_indices(columns: &[SortColumn]) -> Result<UInt32Array> { }; let mut value_indices = (0..row_count).collect::<Vec<usize>>(); - value_indices.sort_by(lex_comparator); + let mut len = value_indices.len(); + + if let Some(limit) = limit { + len = limit.min(len); + } + sort_by(&mut value_indices, len, lex_comparator); Ok(UInt32Array::from( - value_indices - .into_iter() - .map(|i| i as u32) + (&value_indices)[0..len] + .iter() + .map(|i| *i as u32) .collect::<Vec<u32>>(), )) } +pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F) +where + F: FnMut(&T, &T) -> Ordering, +{ + let (before, _mid, _after) = v.select_nth_unstable_by(limit, &mut is_less); + before.sort_unstable_by(is_less); Review comment: Clear, thanks @sundy-li. Maybe some docs can be added about this? I think it probably is not really a problem, but might be surprising by someone using the kernel. And we might want to see whether we could use a (faster) unstable version of the normal sort kernel as well (Ithat could be used by DataFusion as well I think). What do you think @alamb ########## File path: rust/arrow/src/compute/kernels/sort.rs ########## @@ -686,90 +815,124 @@ pub fn lexsort_to_indices(columns: &[SortColumn]) -> Result<UInt32Array> { }; let mut value_indices = (0..row_count).collect::<Vec<usize>>(); - value_indices.sort_by(lex_comparator); + let mut len = value_indices.len(); + + if let Some(limit) = limit { + len = limit.min(len); + } + sort_by(&mut value_indices, len, lex_comparator); Ok(UInt32Array::from( - value_indices - .into_iter() - .map(|i| i as u32) + (&value_indices)[0..len] + .iter() + .map(|i| *i as u32) .collect::<Vec<u32>>(), )) } +pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F) +where + F: FnMut(&T, &T) -> Ordering, +{ + let (before, _mid, _after) = v.select_nth_unstable_by(limit, &mut is_less); + before.sort_unstable_by(is_less); Review comment: Clear, thanks @sundy-li. Maybe some docs can be added about this? I think it probably is not really a problem, but might be surprising by someone using the kernel. And we might want to see whether we could use a (faster) unstable version of the normal sort kernel as well (that could be used by DataFusion as well I think). What do you think @alamb ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org