This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 79d40c46ee Minor: improve documentation to StringView trim (#12629)
79d40c46ee is described below
commit 79d40c46ee5dbe718ec4894a482fd5a77a0a7c2f
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Sep 27 06:58:15 2024 -0400
Minor: improve documentation to StringView trim (#12629)
* Minor: improve documentation to StringView trim
* clarify what a valid view is
---
datafusion/functions/src/string/common.rs | 74 +++++++++++++++++++++++++------
1 file changed, 60 insertions(+), 14 deletions(-)
diff --git a/datafusion/functions/src/string/common.rs
b/datafusion/functions/src/string/common.rs
index 5c413b9c9a..72447bc68f 100644
--- a/datafusion/functions/src/string/common.rs
+++ b/datafusion/functions/src/string/common.rs
@@ -35,19 +35,27 @@ use datafusion_expr::ColumnarValue;
/// Append a new view to the views buffer with the given substr
///
-/// raw must be a valid view
-/// substr must be a valid substring of raw
-/// start must be less than or equal to the length of the string data
+/// # Safety
+///
+/// original_view must be a valid view (the format described on
+/// [`GenericByteViewArray`](arrow::array::GenericByteViewArray).
+///
+/// # Arguments
+/// - views_buffer: The buffer to append the new view to
+/// - null_builder: The buffer to append the null value to
+/// - original_view: The original view value
+/// - substr: The substring to append. Must be a valid substring of the
original view
+/// - start_offset: The start offset of the substring in the view
pub(crate) fn make_and_append_view(
views_buffer: &mut Vec<u128>,
null_builder: &mut NullBufferBuilder,
- raw_view: &u128,
+ original_view: &u128,
substr: &str,
start_offset: u32,
) {
let substr_len = substr.len();
let sub_view = if substr_len > 12 {
- let view = ByteView::from(*raw_view);
+ let view = ByteView::from(*original_view);
make_view(
substr.as_bytes(),
view.buffer_index,
@@ -82,13 +90,6 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
trim_type: TrimType,
use_string_view: bool,
) -> Result<ArrayRef> {
- // This is the function used to trim each string row, and it will return:
- // - trimmed str
- // e.g. ltrim(" abc") -> "abc"
- //
- // - start offset, needed in `string_view_trim`
- // e.g. "abc" actually is " abc"[2..], and the start offset here
should be 2
- //
let func = match trim_type {
TrimType::Left => |input, pattern: &str| {
let pattern = pattern.chars().collect::<Vec<char>>();
@@ -128,6 +129,28 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
}
}
+/// Applies the trim function to the given string view array(s)
+/// and returns a new string view array with the trimmed values.
+///
+/// # `trim_func`: The function to apply to each string view.
+///
+/// ## Arguments
+/// - The original string
+/// - the pattern to trim
+///
+/// ## Returns
+/// - trimmed str (must be a substring of the first argument)
+/// - start offset, needed in `string_view_trim`
+///
+/// ## Examples
+///
+/// For `ltrim`:
+/// - `fn(" abc", " ") -> ("abc", 2)`
+/// - `fn("abd", " ") -> ("abd", 0)`
+///
+/// For `btrim`:
+/// - `fn(" abc ", " ") -> ("abc", 2)`
+/// - `fn("abd", " ") -> ("abd", 0)`
// removing 'a will cause compiler complaining lifetime of `func`
fn string_view_trim<'a>(
trim_func: fn(&'a str, &'a str) -> (&'a str, u32),
@@ -221,23 +244,46 @@ fn string_view_trim<'a>(
}
}
+/// Trims the given string and appends the trimmed string to the views buffer
+/// and the null buffer.
+///
+/// Calls `trim_func` on the string value in `original_view`, for non_null
+/// values and appends the updated view to the views buffer / null_builder.
+///
+/// Arguments
+/// - `src_str_opt`: The original string value (represented by the view)
+/// - `trim_characters_opt`: The characters to trim from the string
+/// - `trim_func`: The function to apply to the string (see
[`string_view_trim`] for details)
+/// - `views_buf`: The buffer to append the updated views to
+/// - `null_builder`: The buffer to append the null values to
+/// - `original_view`: The original view value (that contains src_str_opt)
fn trim_and_append_str<'a>(
src_str_opt: Option<&'a str>,
trim_characters_opt: Option<&'a str>,
trim_func: fn(&'a str, &'a str) -> (&'a str, u32),
views_buf: &mut Vec<u128>,
null_builder: &mut NullBufferBuilder,
- raw: &u128,
+ original_view: &u128,
) {
if let (Some(src_str), Some(characters)) = (src_str_opt,
trim_characters_opt) {
let (trim_str, start_offset) = trim_func(src_str, characters);
- make_and_append_view(views_buf, null_builder, raw, trim_str,
start_offset);
+ make_and_append_view(
+ views_buf,
+ null_builder,
+ original_view,
+ trim_str,
+ start_offset,
+ );
} else {
null_builder.append_null();
views_buf.push(0);
}
}
+/// Applies the trim function to the given string array(s)
+/// and returns a new string array with the trimmed values.
+///
+/// See [`string_view_trim`] for details on `func`
fn string_trim<'a, T: OffsetSizeTrait>(
func: fn(&'a str, &'a str) -> (&'a str, u32),
args: &'a [ArrayRef],
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]