Dandandan commented on a change in pull request #9654:
URL: https://github.com/apache/arrow/pull/9654#discussion_r589762868



##########
File path: rust/datafusion/src/physical_plan/string_expressions.rs
##########
@@ -794,85 +525,57 @@ pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) 
-> Result<ArrayRef> {
     }
 }
 
-/// Extracts the substring of string starting at the start'th character, and 
extending for count characters if that is specified. (Same as substring(string 
from start for count).)
-/// substr('alphabet', 3) = 'phabet'
-/// substr('alphabet', 3, 2) = 'ph'
-pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> 
{
-    match args.len() {
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let start_array = downcast_arg!(args[1], "start", Int64Array);
+/// Splits string at occurrences of delimiter and returns the n'th field 
(counting from one).
+/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def'
+pub fn split_part<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
+    let string_array = downcast_string_arg!(args[0], "string", T);
+    let delimiter_array = downcast_string_arg!(args[1], "delimiter", T);
+    let n_array = downcast_arg!(args[2], "n", Int64Array);
 
-            let result = string_array
-                .iter()
-                .zip(start_array.iter())
-                .map(|(string, start)| match (string, start) {
-                    (None, _) => None,
-                    (_, None) => None,
-                    (Some(string), Some(start)) => {
-                        if start <= 0 {
-                            Some(string.to_string())
-                        } else {
-                            let graphemes = 
string.graphemes(true).collect::<Vec<&str>>();
-                            let start_pos = start as usize - 1;
-                            if graphemes.len() < start_pos {
-                                Some("".to_string())
-                            } else {
-                                Some(graphemes[start_pos..].concat())
-                            }
-                        }
+    let result = string_array
+        .iter()
+        .zip(delimiter_array.iter())
+        .zip(n_array.iter())
+        .map(|((string, delimiter), n)| match (string, delimiter, n) {
+            (None, _, _) => Ok(None),
+            (_, None, _) => Ok(None),
+            (_, _, None) => Ok(None),
+            (Some(string), Some(delimiter), Some(n)) => {
+                if n <= 0 {
+                    Err(DataFusionError::Execution(
+                        "field position must be greater than zero".to_string(),
+                    ))
+                } else {
+                    let split_string: Vec<&str> = 
string.split(delimiter).collect();
+                    match split_string.get(n as usize - 1) {
+                        Some(s) => Ok(Some(*s)),
+                        None => Ok(Some("")),
                     }
-                })
-                .collect::<GenericStringArray<T>>();
+                }
+            }
+        })
+        .collect::<Result<GenericStringArray<T>>>()?;
 
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        3 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let start_array = downcast_arg!(args[1], "start", Int64Array);
-            let count_array = downcast_arg!(args[2], "count", Int64Array);
+    Ok(Arc::new(result) as ArrayRef)
+}
 
-            let result = string_array
-                .iter()
-                .zip(start_array.iter())
-                .zip(count_array.iter())
-                .map(|((string, start), count)| match (string, start, count) {
-                    (None, _, _) => Ok(None),
-                    (_, None, _) => Ok(None),
-                    (_, _, None) => Ok(None),
-                    (Some(string), Some(start), Some(count)) => {
-                        if count < 0 {
-                            Err(DataFusionError::Execution(
-                                "negative substring length not 
allowed".to_string(),
-                            ))
-                        } else if start <= 0 {
-                            Ok(Some(string.to_string()))
-                        } else {
-                            let graphemes = 
string.graphemes(true).collect::<Vec<&str>>();
-                            let start_pos = start as usize - 1;
-                            let count_usize = count as usize;
-                            if graphemes.len() < start_pos {
-                                Ok(Some("".to_string()))
-                            } else if graphemes.len() < start_pos + 
count_usize {
-                                Ok(Some(graphemes[start_pos..].concat()))
-                            } else {
-                                Ok(Some(
-                                    graphemes[start_pos..start_pos + 
count_usize]
-                                        .concat(),
-                                ))
-                            }
-                        }
-                    }
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
+/// Returns true if string starts with prefix.
+/// starts_with('alphabet', 'alph') = 't'
+pub fn starts_with<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
+    let string_array = downcast_string_arg!(args[0], "string", T);
+    let prefix_array = downcast_string_arg!(args[1], "prefix", T);
 
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "substr was called with {} arguments. It requires 2 or 3.",
-            other
-        ))),
-    }
+    let result = string_array
+        .iter()
+        .zip(prefix_array.iter())
+        .map(|(string, prefix)| match (string, prefix) {
+            (None, _) => None,
+            (_, None) => None,

Review comment:
       Same as above




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to