HaoYang670 commented on code in PR #1832: URL: https://github.com/apache/arrow-rs/pull/1832#discussion_r894261742
########## arrow/src/compute/kernels/substring.rs: ########## @@ -182,24 +182,66 @@ pub fn substring_by_char<OffsetSize: OffsetSizeTrait>( start: i64, length: Option<u64>, ) -> Result<GenericStringArray<OffsetSize>> { - Ok(array - .iter() - .map(|val| { - val.map(|val| { - let char_count = val.chars().count(); - let start = if start >= 0 { - start.to_usize().unwrap().min(char_count) - } else { - char_count - (-start).to_usize().unwrap().min(char_count) - }; - let length = length.map_or(char_count - start, |length| { - length.to_usize().unwrap().min(char_count - start) - }); + let mut vals = BufferBuilder::<u8>::new(array.value_data().len()); + let mut offsets = BufferBuilder::<OffsetSize>::new(array.len() + 1); + offsets.append(OffsetSize::zero()); + let length = length.map(|len| len.to_usize().unwrap()); + + array.iter().for_each(|val| { + if let Some(val) = val { + let char_count = val.chars().count(); + let start = if start >= 0 { + start.to_usize().unwrap() + } else { + char_count - (-start).to_usize().unwrap().min(char_count) + }; + let (start_offset, end_offset) = get_start_end_offset(val, start, length); + vals.append_slice(&val.as_bytes()[start_offset..end_offset]); + } + offsets.append(OffsetSize::from_usize(vals.len()).unwrap()); + }); + let data = unsafe { + ArrayData::new_unchecked( + GenericStringArray::<OffsetSize>::get_data_type(), + array.len(), + None, + array + .data_ref() + .null_buffer() + .map(|b| b.bit_slice(array.offset(), array.len())), + 0, + vec![offsets.finish(), vals.finish()], + vec![], + ) + }; + Ok(GenericStringArray::<OffsetSize>::from(data)) Review Comment: The `Ok()` here is just to make this function to be consistent with `substring`. It is meaningless because this function never return an error (Actually, I don't like `unwrap` or `panic` in a function returning `Result`. But the code would be much unclear is I remove `unwrap` (lots of `ok_or(...)`)). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org