comphead commented on code in PR #19551:
URL: https://github.com/apache/datafusion/pull/19551#discussion_r2651610456


##########
datafusion/functions/benches/trim.rs:
##########
@@ -48,28 +48,58 @@ impl fmt::Display for StringArrayType {
     }
 }
 
-/// returns an array of strings, and `characters` as a ScalarValue
-pub fn create_string_array_and_characters(
+#[derive(Clone, Copy)]
+pub enum TrimType {
+    Ltrim,
+    Rtrim,
+    Btrim,
+}
+
+impl fmt::Display for TrimType {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            TrimType::Ltrim => f.write_str("ltrim"),
+            TrimType::Rtrim => f.write_str("rtrim"),
+            TrimType::Btrim => f.write_str("btrim"),
+        }
+    }
+}
+
+/// Returns an array of strings with trim characters positioned according to 
trim type,
+/// and `characters` as a ScalarValue.
+///
+/// For ltrim: trim characters are at the start (prefix)
+/// For rtrim: trim characters are at the end (suffix)
+/// For btrim: trim characters are at both start and end
+fn create_string_array_and_characters(
     size: usize,
     characters: &str,
     trimmed: &str,
     remaining_len: usize,
     string_array_type: StringArrayType,
+    trim_type: TrimType,
 ) -> (ArrayRef, ScalarValue) {
     let rng = &mut StdRng::seed_from_u64(42);
 
     // Create `size` rows:
     //   - 10% rows will be `None`
-    //   - Other 90% will be strings with same `remaining_len` lengths
-    // We will build the string array on it later.
+    //   - Other 90% will be strings with `remaining_len` content length
     let string_iter = (0..size).map(|_| {
         if rng.random::<f32>() < 0.1 {
             None
         } else {
-            let mut value = trimmed.as_bytes().to_vec();
-            let generated = rng.sample_iter(&Alphanumeric).take(remaining_len);
-            value.extend(generated);
-            Some(String::from_utf8(value).unwrap())
+            let content: String = rng
+                .sample_iter(&Alphanumeric)
+                .take(remaining_len)
+                .map(char::from)
+                .collect();
+
+            let value = match trim_type {
+                TrimType::Ltrim => format!("{trimmed}{content}"),
+                TrimType::Rtrim => format!("{content}{trimmed}"),
+                TrimType::Btrim => format!("{trimmed}{content}{trimmed}"),
+            };

Review Comment:
   ```suggestion
   let value = {
       let (prefix, suffix) = match trim_type {
           TrimType::Ltrim => (trimmed, ""),
           TrimType::Rtrim => ("", trimmed),
           TrimType::Btrim => (trimmed, trimmed),
       };
   
       let mut s = String::with_capacity(
           prefix.len() + content.len() + suffix.len()
       );
   
       s.push_str(prefix);
       s.push_str(content);
       s.push_str(suffix);
       s
   };
   ```
   
   Slightly more verbose but less string allocations on concats



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to