This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to tag 26.0.0 in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
commit fb2b49c202de28a166cab4fbbe84e0024ed706d4 Author: Andrew Lamb <[email protected]> AuthorDate: Mon Oct 31 13:26:15 2022 -0400 Fix ignored limit on `lexsort_to_indices` (#2991) * Fix ignored limit on lexsort_to_indices * Update comments * Update arrow/src/compute/kernels/sort.rs Co-authored-by: Batuhan Taskaya <[email protected]> Co-authored-by: Batuhan Taskaya <[email protected]> --- arrow/src/compute/kernels/sort.rs | 40 ++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index b29762264..a10e674ac 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -950,7 +950,7 @@ pub fn lexsort_to_indices( }); Ok(UInt32Array::from_iter_values( - value_indices.iter().map(|i| *i as u32), + value_indices.iter().take(len).map(|i| *i as u32), )) } @@ -1422,6 +1422,18 @@ mod tests { } } + /// slice all arrays in expected_output to offset/length + fn slice_arrays( + expected_output: Vec<ArrayRef>, + offset: usize, + length: usize, + ) -> Vec<ArrayRef> { + expected_output + .into_iter() + .map(|array| array.slice(offset, length)) + .collect() + } + fn test_sort_binary_arrays( data: Vec<Option<Vec<u8>>>, options: Option<SortOptions>, @@ -3439,8 +3451,10 @@ mod tests { Some(2), Some(17), ])) as ArrayRef]; - test_lex_sort_arrays(input.clone(), expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2)); + // Explicitly test a limit on the sort as a demonstration let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![ Some(-1), Some(0), @@ -3519,7 +3533,8 @@ mod tests { Some(-2), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2)); // test mix of string and in64 with option let input = vec![ @@ -3562,7 +3577,8 @@ mod tests { Some("7"), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3)); // test sort with nulls first let input = vec![ @@ -3605,7 +3621,8 @@ mod tests { Some("world"), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1)); // test sort with nulls last let input = vec![ @@ -3648,7 +3665,8 @@ mod tests { None, ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2)); // test sort with opposite options let input = vec![ @@ -3695,7 +3713,15 @@ mod tests { Some("foo"), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays( + input.clone(), + slice_arrays(expected.clone(), 0, 5), + Some(5), + ); + + // Limiting by more rows than present is ok + test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10)); } #[test]
