alamb commented on code in PR #8434:
URL: https://github.com/apache/arrow-datafusion/pull/8434#discussion_r1420793866
##########
datafusion/physical-expr/src/string_expressions.rs:
##########
@@ -346,44 +302,95 @@ pub fn lower(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
handle(args, |string| string.to_ascii_lowercase(), "lower")
}
-/// Removes the longest string containing only characters in characters (a
space by default) from the start of string.
-/// ltrim('zzzytest', 'xyz') = 'test'
-pub fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+enum TrimType {
+ Left,
+ Right,
+ Both,
+}
+
+impl Display for TrimType {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ match self {
+ TrimType::Left => write!(f, "ltrim"),
+ TrimType::Right => write!(f, "rtrim"),
+ TrimType::Both => write!(f, "btrim"),
+ }
+ }
+}
+
+fn general_trim<T: OffsetSizeTrait>(
+ args: &[ArrayRef],
+ trim_type: TrimType,
+) -> Result<ArrayRef> {
+ let func = match trim_type {
+ TrimType::Left => |input, pattern: &str| {
+ let pattern = pattern.chars().collect::<Vec<char>>();
+ str::trim_start_matches::<&[char]>(input, pattern.as_ref())
+ },
+ TrimType::Right => |input, pattern: &str| {
+ let pattern = pattern.chars().collect::<Vec<char>>();
+ str::trim_end_matches::<&[char]>(input, pattern.as_ref())
+ },
+ TrimType::Both => |input, pattern: &str| {
+ let pattern = pattern.chars().collect::<Vec<char>>();
+ str::trim_end_matches::<&[char]>(
+ str::trim_start_matches::<&[char]>(input, pattern.as_ref()),
+ pattern.as_ref(),
+ )
+ },
+ };
+
+ let string_array = as_generic_string_array::<T>(&args[0])?;
+
match args.len() {
1 => {
- let string_array = as_generic_string_array::<T>(&args[0])?;
-
let result = string_array
.iter()
- .map(|string| string.map(|string: &str|
string.trim_start_matches(' ')))
+ .map(|string| string.map(|string: &str| func(string, " ")))
.collect::<GenericStringArray<T>>();
Ok(Arc::new(result) as ArrayRef)
}
2 => {
- let string_array = as_generic_string_array::<T>(&args[0])?;
let characters_array = as_generic_string_array::<T>(&args[1])?;
let result = string_array
.iter()
.zip(characters_array.iter())
.map(|(string, characters)| match (string, characters) {
- (Some(string), Some(characters)) => {
- let chars: Vec<char> = characters.chars().collect();
- Some(string.trim_start_matches(&chars[..]))
- }
+ (Some(string), Some(characters)) => Some(func(string,
characters)),
_ => None,
})
.collect::<GenericStringArray<T>>();
Ok(Arc::new(result) as ArrayRef)
}
- other => internal_err!(
- "ltrim was called with {other} arguments. It requires at least 1
and at most 2."
- ),
+ other => {
+ internal_err!(
+ "{trim_type} was called with {other} arguments. It requires at
least 1 and at most 2."
+ )
+ }
}
}
+/// Removes the longest string containing only characters in characters (a
space by default) from the start and end of string.
+/// btrim('xyxtrimyyx', 'xyz') = 'trim'
Review Comment:
It might help to explain what `characters` is here -- specifically `args[1]`
if present
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]