This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 76b9e123e4 Limit visibility of internal impl functions in function
crates (#18877)
76b9e123e4 is described below
commit 76b9e123e44b92423b9a8e32cc407af58fb3bac0
Author: Jeffrey Vo <[email protected]>
AuthorDate: Mon Nov 24 08:21:24 2025 +1100
Limit visibility of internal impl functions in function crates (#18877)
## Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->
N/A
## Rationale for this change
<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
Whilst reviewing some recent PRs (#18839 & #18768) I noticed we have
quite a few inner implementation functions that are public for some
reason, which give the false impression these are meant to be public
APIs (and thus any changes to their signature needs to be restricted).
Went through and limited the functions to private where possible to try
reduce our public API footprint.
## What changes are included in this PR?
<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
Change inner functions in functions & nested-functions crates to be
private, away from public.
- There are still some that are left public such as some regex ones,
because they are used directly in benches
## Are these changes tested?
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
Compiler itself.
## Are there any user-facing changes?
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->
Yes, quite a few functions are now private, but I don't think they were
meant to be public in the first place.
<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
datafusion/functions-nested/src/cardinality.rs | 3 +-
datafusion/functions-nested/src/concat.rs | 9 ++--
datafusion/functions-nested/src/dimension.rs | 6 +--
datafusion/functions-nested/src/distance.rs | 2 +-
datafusion/functions-nested/src/empty.rs | 3 +-
datafusion/functions-nested/src/except.rs | 3 +-
datafusion/functions-nested/src/flatten.rs | 3 +-
datafusion/functions-nested/src/length.rs | 3 +-
datafusion/functions-nested/src/min_max.rs | 11 +----
datafusion/functions-nested/src/position.rs | 7 ++-
datafusion/functions-nested/src/remove.rs | 9 ++--
datafusion/functions-nested/src/repeat.rs | 3 +-
datafusion/functions-nested/src/replace.rs | 6 +--
datafusion/functions-nested/src/resize.rs | 3 +-
datafusion/functions-nested/src/set_ops.rs | 2 -
datafusion/functions-nested/src/sort.rs | 6 +--
datafusion/functions-nested/src/string.rs | 3 +-
datafusion/functions/src/core/overlay.rs | 4 +-
datafusion/functions/src/core/version.rs | 2 +-
datafusion/functions/src/math/iszero.rs | 2 +-
datafusion/functions/src/math/round.rs | 2 +-
datafusion/functions/src/math/signum.rs | 2 +-
datafusion/functions/src/regex/regexpcount.rs | 2 +-
datafusion/functions/src/regex/regexpinstr.rs | 33 +++++++-------
datafusion/functions/src/regex/regexpreplace.rs | 57 ++++++++-----------------
datafusion/functions/src/string/chr.rs | 2 +-
datafusion/functions/src/string/common.rs | 2 +-
datafusion/functions/src/string/concat.rs | 2 +-
datafusion/functions/src/string/split_part.rs | 44 +++++++++----------
datafusion/functions/src/string/to_hex.rs | 2 +-
datafusion/functions/src/unicode/find_in_set.rs | 5 +--
datafusion/functions/src/unicode/left.rs | 2 +-
datafusion/functions/src/unicode/lpad.rs | 2 +-
datafusion/functions/src/unicode/reverse.rs | 2 +-
datafusion/functions/src/unicode/right.rs | 2 +-
datafusion/functions/src/unicode/rpad.rs | 4 +-
datafusion/functions/src/unicode/substr.rs | 2 +-
datafusion/functions/src/unicode/substrindex.rs | 2 +-
38 files changed, 101 insertions(+), 158 deletions(-)
diff --git a/datafusion/functions-nested/src/cardinality.rs
b/datafusion/functions-nested/src/cardinality.rs
index 6db0011cd0..58a83feb66 100644
--- a/datafusion/functions-nested/src/cardinality.rs
+++ b/datafusion/functions-nested/src/cardinality.rs
@@ -117,8 +117,7 @@ impl ScalarUDFImpl for Cardinality {
}
}
-/// Cardinality SQL function
-pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array] = take_function_args("cardinality", args)?;
match array.data_type() {
Null => Ok(Arc::new(UInt64Array::from_value(0, array.len()))),
diff --git a/datafusion/functions-nested/src/concat.rs
b/datafusion/functions-nested/src/concat.rs
index 9a12db525f..a565006a25 100644
--- a/datafusion/functions-nested/src/concat.rs
+++ b/datafusion/functions-nested/src/concat.rs
@@ -352,8 +352,7 @@ impl ScalarUDFImpl for ArrayConcat {
}
}
-/// Array_concat/Array_cat SQL function
-pub(crate) fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_concat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.is_empty() {
return exec_err!("array_concat expects at least one argument");
}
@@ -453,8 +452,7 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
// Kernel functions
-/// Array_append SQL function
-pub(crate) fn array_append_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_append_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array, values] = take_function_args("array_append", args)?;
match array.data_type() {
DataType::Null => make_array_inner(&[Arc::clone(values)]),
@@ -464,8 +462,7 @@ pub(crate) fn array_append_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
-/// Array_prepend SQL function
-pub(crate) fn array_prepend_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_prepend_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [values, array] = take_function_args("array_prepend", args)?;
match array.data_type() {
DataType::Null => make_array_inner(&[Arc::clone(values)]),
diff --git a/datafusion/functions-nested/src/dimension.rs
b/datafusion/functions-nested/src/dimension.rs
index b0fc5bee54..d0fa294fe4 100644
--- a/datafusion/functions-nested/src/dimension.rs
+++ b/datafusion/functions-nested/src/dimension.rs
@@ -189,8 +189,7 @@ impl ScalarUDFImpl for ArrayNdims {
}
}
-/// Array_dims SQL function
-pub fn array_dims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_dims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array] = take_function_args("array_dims", args)?;
let data: Vec<_> = match array.data_type() {
List(_) => as_list_array(&array)?
@@ -214,8 +213,7 @@ pub fn array_dims_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
Ok(Arc::new(result))
}
-/// Array_ndims SQL function
-pub fn array_ndims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_ndims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array] = take_function_args("array_ndims", args)?;
fn general_list_ndims(array: &ArrayRef) -> Result<ArrayRef> {
diff --git a/datafusion/functions-nested/src/distance.rs
b/datafusion/functions-nested/src/distance.rs
index e2e38fbd0d..dc8eaa699f 100644
--- a/datafusion/functions-nested/src/distance.rs
+++ b/datafusion/functions-nested/src/distance.rs
@@ -141,7 +141,7 @@ impl ScalarUDFImpl for ArrayDistance {
}
}
-pub fn array_distance_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_distance_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array1, array2] = take_function_args("array_distance", args)?;
match (array1.data_type(), array2.data_type()) {
(List(_), List(_)) => general_array_distance::<i32>(args),
diff --git a/datafusion/functions-nested/src/empty.rs
b/datafusion/functions-nested/src/empty.rs
index 27a90ab044..3f90775752 100644
--- a/datafusion/functions-nested/src/empty.rs
+++ b/datafusion/functions-nested/src/empty.rs
@@ -110,8 +110,7 @@ impl ScalarUDFImpl for ArrayEmpty {
}
}
-/// Array_empty SQL function
-pub fn array_empty_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_empty_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array] = take_function_args("array_empty", args)?;
match array.data_type() {
List(_) => general_array_empty::<i32>(array),
diff --git a/datafusion/functions-nested/src/except.rs
b/datafusion/functions-nested/src/except.rs
index d6982ab5a2..8b6bcaa062 100644
--- a/datafusion/functions-nested/src/except.rs
+++ b/datafusion/functions-nested/src/except.rs
@@ -126,8 +126,7 @@ impl ScalarUDFImpl for ArrayExcept {
}
}
-/// Array_except SQL function
-pub fn array_except_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_except_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array1, array2] = take_function_args("array_except", args)?;
match (array1.data_type(), array2.data_type()) {
diff --git a/datafusion/functions-nested/src/flatten.rs
b/datafusion/functions-nested/src/flatten.rs
index e84a942fab..76c4714de1 100644
--- a/datafusion/functions-nested/src/flatten.rs
+++ b/datafusion/functions-nested/src/flatten.rs
@@ -130,8 +130,7 @@ impl ScalarUDFImpl for Flatten {
}
}
-/// Flatten SQL function
-pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array] = take_function_args("flatten", args)?;
match array.data_type() {
diff --git a/datafusion/functions-nested/src/length.rs
b/datafusion/functions-nested/src/length.rs
index 060a978185..ceceee7bfa 100644
--- a/datafusion/functions-nested/src/length.rs
+++ b/datafusion/functions-nested/src/length.rs
@@ -150,8 +150,7 @@ macro_rules! array_length_impl {
}};
}
-/// Array_length SQL function
-pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 1 && args.len() != 2 {
return exec_err!("array_length expects one or two arguments");
}
diff --git a/datafusion/functions-nested/src/min_max.rs
b/datafusion/functions-nested/src/min_max.rs
index 117cfbeaa2..1f3623ca24 100644
--- a/datafusion/functions-nested/src/min_max.rs
+++ b/datafusion/functions-nested/src/min_max.rs
@@ -113,14 +113,7 @@ impl ScalarUDFImpl for ArrayMax {
}
}
-/// array_max SQL function
-///
-/// There is one argument for array_max as the array.
-/// `array_max(array)`
-///
-/// For example:
-/// > array_max(\[1, 3, 2]) -> 3
-pub fn array_max_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_max_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array] = take_function_args("array_max", args)?;
match array.data_type() {
List(_) => array_min_max_helper(as_list_array(array)?, max_batch),
@@ -202,7 +195,7 @@ impl ScalarUDFImpl for ArrayMin {
}
}
-pub fn array_min_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_min_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array] = take_function_args("array_min", args)?;
match array.data_type() {
List(_) => array_min_max_helper(as_list_array(array)?, min_batch),
diff --git a/datafusion/functions-nested/src/position.rs
b/datafusion/functions-nested/src/position.rs
index b390bf3c42..14f2ed3313 100644
--- a/datafusion/functions-nested/src/position.rs
+++ b/datafusion/functions-nested/src/position.rs
@@ -141,8 +141,7 @@ impl ScalarUDFImpl for ArrayPosition {
}
}
-/// Array_position SQL function
-pub fn array_position_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_position_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() < 2 || args.len() > 3 {
return exec_err!("array_position expects two or three arguments");
}
@@ -152,6 +151,7 @@ pub fn array_position_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
array_type => exec_err!("array_position does not support type
'{array_type}'."),
}
}
+
fn general_position_dispatch<O: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
let list_array = as_generic_list_array::<O>(&args[0])?;
let element_array = &args[1];
@@ -292,8 +292,7 @@ impl ScalarUDFImpl for ArrayPositions {
}
}
-/// Array_positions SQL function
-pub fn array_positions_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_positions_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array, element] = take_function_args("array_positions", args)?;
match &array.data_type() {
diff --git a/datafusion/functions-nested/src/remove.rs
b/datafusion/functions-nested/src/remove.rs
index e1ebc9cda0..46111b0c2d 100644
--- a/datafusion/functions-nested/src/remove.rs
+++ b/datafusion/functions-nested/src/remove.rs
@@ -284,24 +284,21 @@ impl ScalarUDFImpl for ArrayRemoveAll {
}
}
-/// Array_remove SQL function
-pub fn array_remove_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_remove_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array, element] = take_function_args("array_remove", args)?;
let arr_n = vec![1; array.len()];
array_remove_internal(array, element, &arr_n)
}
-/// Array_remove_n SQL function
-pub fn array_remove_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_remove_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array, element, max] = take_function_args("array_remove_n", args)?;
let arr_n = as_int64_array(max)?.values().to_vec();
array_remove_internal(array, element, &arr_n)
}
-/// Array_remove_all SQL function
-pub fn array_remove_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_remove_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array, element] = take_function_args("array_remove_all", args)?;
let arr_n = vec![i64::MAX; array.len()];
diff --git a/datafusion/functions-nested/src/repeat.rs
b/datafusion/functions-nested/src/repeat.rs
index ed66b9e396..d978081e49 100644
--- a/datafusion/functions-nested/src/repeat.rs
+++ b/datafusion/functions-nested/src/repeat.rs
@@ -148,8 +148,7 @@ impl ScalarUDFImpl for ArrayRepeat {
}
}
-/// Array_repeat SQL function
-pub fn array_repeat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_repeat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let element = &args[0];
let count_array = &args[1];
diff --git a/datafusion/functions-nested/src/replace.rs
b/datafusion/functions-nested/src/replace.rs
index 079c28175d..53182b5898 100644
--- a/datafusion/functions-nested/src/replace.rs
+++ b/datafusion/functions-nested/src/replace.rs
@@ -418,7 +418,7 @@ fn general_replace<O: OffsetSizeTrait>(
)?))
}
-pub(crate) fn array_replace_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_replace_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array, from, to] = take_function_args("array_replace", args)?;
// replace at most one occurrence for each element
@@ -437,7 +437,7 @@ pub(crate) fn array_replace_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
-pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_replace_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array, from, to, max] = take_function_args("array_replace_n", args)?;
// replace the specified number of occurrences
@@ -458,7 +458,7 @@ pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
-pub(crate) fn array_replace_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_replace_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array, from, to] = take_function_args("array_replace_all", args)?;
// replace all occurrences (up to "i64::MAX")
diff --git a/datafusion/functions-nested/src/resize.rs
b/datafusion/functions-nested/src/resize.rs
index 09f67a75fd..c76f7970d2 100644
--- a/datafusion/functions-nested/src/resize.rs
+++ b/datafusion/functions-nested/src/resize.rs
@@ -152,8 +152,7 @@ impl ScalarUDFImpl for ArrayResize {
}
}
-/// array_resize SQL function
-pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
if arg.len() < 2 || arg.len() > 3 {
return exec_err!("array_resize needs two or three arguments");
}
diff --git a/datafusion/functions-nested/src/set_ops.rs
b/datafusion/functions-nested/src/set_ops.rs
index 71a42531f9..e3531d1cf8 100644
--- a/datafusion/functions-nested/src/set_ops.rs
+++ b/datafusion/functions-nested/src/set_ops.rs
@@ -501,13 +501,11 @@ fn general_set_op(
}
}
-/// Array_union SQL function
fn array_union_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array1, array2] = take_function_args("array_union", args)?;
general_set_op(array1, array2, SetOp::Union)
}
-/// array_intersect SQL function
fn array_intersect_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array1, array2] = take_function_args("array_intersect", args)?;
general_set_op(array1, array2, SetOp::Intersect)
diff --git a/datafusion/functions-nested/src/sort.rs
b/datafusion/functions-nested/src/sort.rs
index 4a7aa31c75..8cfc8a297b 100644
--- a/datafusion/functions-nested/src/sort.rs
+++ b/datafusion/functions-nested/src/sort.rs
@@ -164,8 +164,7 @@ impl ScalarUDFImpl for ArraySort {
}
}
-/// Array_sort SQL function
-pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.is_empty() || args.len() > 3 {
return exec_err!("array_sort expects one to three arguments");
}
@@ -218,8 +217,7 @@ pub fn array_sort_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
-/// Array_sort SQL function
-pub fn array_sort_generic<OffsetSize: OffsetSizeTrait>(
+fn array_sort_generic<OffsetSize: OffsetSizeTrait>(
list_array: &GenericListArray<OffsetSize>,
field: &FieldRef,
sort_options: Option<SortOptions>,
diff --git a/datafusion/functions-nested/src/string.rs
b/datafusion/functions-nested/src/string.rs
index b87ac0f8c4..e19025cf67 100644
--- a/datafusion/functions-nested/src/string.rs
+++ b/datafusion/functions-nested/src/string.rs
@@ -329,8 +329,7 @@ impl ScalarUDFImpl for StringToArray {
}
}
-/// Array_to_string SQL function
-pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() < 2 || args.len() > 3 {
return exec_err!("array_to_string expects two or three arguments");
}
diff --git a/datafusion/functions/src/core/overlay.rs
b/datafusion/functions/src/core/overlay.rs
index 165bc571af..0b3bb2ce74 100644
--- a/datafusion/functions/src/core/overlay.rs
+++ b/datafusion/functions/src/core/overlay.rs
@@ -201,7 +201,7 @@ fn overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
-pub fn string_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
+fn string_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
match args.len() {
3 => {
let string_array = as_generic_string_array::<T>(&args[0])?;
@@ -227,7 +227,7 @@ pub fn string_overlay<T: OffsetSizeTrait>(args:
&[ArrayRef]) -> Result<ArrayRef>
}
}
-pub fn string_view_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
+fn string_view_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
match args.len() {
3 => {
let string_array = as_string_view_array(&args[0])?;
diff --git a/datafusion/functions/src/core/version.rs
b/datafusion/functions/src/core/version.rs
index ef3c5aafa4..006da4b132 100644
--- a/datafusion/functions/src/core/version.rs
+++ b/datafusion/functions/src/core/version.rs
@@ -53,7 +53,7 @@ impl Default for VersionFunc {
impl VersionFunc {
pub fn new() -> Self {
Self {
- signature: Signature::exact(vec![], Volatility::Immutable),
+ signature: Signature::nullary(Volatility::Immutable),
}
}
}
diff --git a/datafusion/functions/src/math/iszero.rs
b/datafusion/functions/src/math/iszero.rs
index 68cd3aca28..f053256a48 100644
--- a/datafusion/functions/src/math/iszero.rs
+++ b/datafusion/functions/src/math/iszero.rs
@@ -96,7 +96,7 @@ impl ScalarUDFImpl for IsZeroFunc {
}
/// Iszero SQL function
-pub fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
Float64 => Ok(Arc::new(BooleanArray::from_unary(
args[0].as_primitive::<Float64Type>(),
diff --git a/datafusion/functions/src/math/round.rs
b/datafusion/functions/src/math/round.rs
index 837f0be432..5f9b1eb6ad 100644
--- a/datafusion/functions/src/math/round.rs
+++ b/datafusion/functions/src/math/round.rs
@@ -124,7 +124,7 @@ impl ScalarUDFImpl for RoundFunc {
}
/// Round SQL function
-pub fn round(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn round(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 1 && args.len() != 2 {
return exec_err!(
"round function requires one or two arguments, got {}",
diff --git a/datafusion/functions/src/math/signum.rs
b/datafusion/functions/src/math/signum.rs
index bbe6178f39..2e616fe0fe 100644
--- a/datafusion/functions/src/math/signum.rs
+++ b/datafusion/functions/src/math/signum.rs
@@ -107,7 +107,7 @@ impl ScalarUDFImpl for SignumFunc {
}
/// signum SQL function
-pub fn signum(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn signum(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
Float64 => Ok(Arc::new(
args[0]
diff --git a/datafusion/functions/src/regex/regexpcount.rs
b/datafusion/functions/src/regex/regexpcount.rs
index e61b3f764d..ae08ca3e92 100644
--- a/datafusion/functions/src/regex/regexpcount.rs
+++ b/datafusion/functions/src/regex/regexpcount.rs
@@ -183,7 +183,7 @@ pub fn regexp_count_func(args: &[ArrayRef]) ->
Result<ArrayRef> {
///
/// # Errors
/// Returns an error if the input arrays have mismatched lengths or if the
regular expression fails to compile.
-pub fn regexp_count(
+fn regexp_count(
values: &dyn Array,
regex_array: &dyn Datum,
start_array: Option<&dyn Datum>,
diff --git a/datafusion/functions/src/regex/regexpinstr.rs
b/datafusion/functions/src/regex/regexpinstr.rs
index 10fddda1a3..0115648665 100644
--- a/datafusion/functions/src/regex/regexpinstr.rs
+++ b/datafusion/functions/src/regex/regexpinstr.rs
@@ -205,7 +205,7 @@ pub fn regexp_instr_func(args: &[ArrayRef]) ->
Result<ArrayRef> {
///
/// # Errors
/// Returns an error if the input arrays have mismatched lengths or if the
regular expression fails to compile.
-pub fn regexp_instr(
+fn regexp_instr(
values: &dyn Array,
regex_array: &dyn Datum,
start_array: Option<&dyn Datum>,
@@ -233,48 +233,48 @@ pub fn regexp_instr(
match (values.data_type(), regex_array.data_type(), flags_array) {
(Utf8, Utf8, None) => regexp_instr_inner(
- values.as_string::<i32>(),
- regex_array.as_string::<i32>(),
+ &values.as_string::<i32>(),
+ ®ex_array.as_string::<i32>(),
start_array.map(|start| start.as_primitive::<Int64Type>()),
nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
None,
subexpr_array.map(|subexpr| subexpr.as_primitive::<Int64Type>()),
),
(Utf8, Utf8, Some(flags_array)) if *flags_array.data_type() == Utf8 =>
regexp_instr_inner(
- values.as_string::<i32>(),
- regex_array.as_string::<i32>(),
+ &values.as_string::<i32>(),
+ ®ex_array.as_string::<i32>(),
start_array.map(|start| start.as_primitive::<Int64Type>()),
nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
Some(flags_array.as_string::<i32>()),
subexpr_array.map(|subexpr| subexpr.as_primitive::<Int64Type>()),
),
(LargeUtf8, LargeUtf8, None) => regexp_instr_inner(
- values.as_string::<i64>(),
- regex_array.as_string::<i64>(),
+ &values.as_string::<i64>(),
+ ®ex_array.as_string::<i64>(),
start_array.map(|start| start.as_primitive::<Int64Type>()),
nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
None,
subexpr_array.map(|subexpr| subexpr.as_primitive::<Int64Type>()),
),
(LargeUtf8, LargeUtf8, Some(flags_array)) if *flags_array.data_type()
== LargeUtf8 => regexp_instr_inner(
- values.as_string::<i64>(),
- regex_array.as_string::<i64>(),
+ &values.as_string::<i64>(),
+ ®ex_array.as_string::<i64>(),
start_array.map(|start| start.as_primitive::<Int64Type>()),
nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
Some(flags_array.as_string::<i64>()),
subexpr_array.map(|subexpr| subexpr.as_primitive::<Int64Type>()),
),
(Utf8View, Utf8View, None) => regexp_instr_inner(
- values.as_string_view(),
- regex_array.as_string_view(),
+ &values.as_string_view(),
+ ®ex_array.as_string_view(),
start_array.map(|start| start.as_primitive::<Int64Type>()),
nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
None,
subexpr_array.map(|subexpr| subexpr.as_primitive::<Int64Type>()),
),
(Utf8View, Utf8View, Some(flags_array)) if *flags_array.data_type() ==
Utf8View => regexp_instr_inner(
- values.as_string_view(),
- regex_array.as_string_view(),
+ &values.as_string_view(),
+ ®ex_array.as_string_view(),
start_array.map(|start| start.as_primitive::<Int64Type>()),
nth_array.map(|nth| nth.as_primitive::<Int64Type>()),
Some(flags_array.as_string_view()),
@@ -287,10 +287,9 @@ pub fn regexp_instr(
}
#[allow(clippy::too_many_arguments)]
-#[expect(clippy::needless_pass_by_value)]
-pub fn regexp_instr_inner<'a, S>(
- values: S,
- regex_array: S,
+fn regexp_instr_inner<'a, S>(
+ values: &S,
+ regex_array: &S,
start_array: Option<&Int64Array>,
nth_array: Option<&Int64Array>,
flags_array: Option<S>,
diff --git a/datafusion/functions/src/regex/regexpreplace.rs
b/datafusion/functions/src/regex/regexpreplace.rs
index f986642713..29da195c7a 100644
--- a/datafusion/functions/src/regex/regexpreplace.rs
+++ b/datafusion/functions/src/regex/regexpreplace.rs
@@ -76,7 +76,7 @@ Additional examples can be found
[here](https://github.com/apache/datafusion/blo
argument(
name = "flags",
description = r#"Optional regular expression flags that control the
behavior of the regular expression. The following flags are supported:
-- **g**: (global) Search globally and don't return after the first match
+- **g**: (global) Search globally and don't return after the first match
- **i**: case-insensitive: letters match both upper and lower case
- **m**: multi-line mode: ^ and $ match begin/end of line
- **s**: allow . to match \n
@@ -382,49 +382,32 @@ where
}
}
-#[expect(clippy::needless_pass_by_value)]
-fn _regexp_replace_early_abort<T: ArrayAccessor>(
- input_array: T,
- sz: usize,
-) -> Result<ArrayRef> {
- // Mimicking the existing behavior of regexp_replace, if any of the scalar
arguments
- // are actually null, then the result will be an array of the same size as
the first argument with all nulls.
- //
- // Also acts like an early abort mechanism when the input array is empty.
- Ok(new_null_array(input_array.data_type(), sz))
-}
-
/// Get the first argument from the given string array.
///
/// Note: If the array is empty or the first argument is null,
-/// then calls the given early abort function.
+/// then aborts early.
macro_rules! fetch_string_arg {
- ($ARG:expr, $NAME:expr, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) => {{
+ ($ARG:expr, $NAME:expr, $ARRAY_SIZE:expr) => {{
let string_array_type = ($ARG).data_type();
match string_array_type {
+ dt if $ARG.len() == 0 || $ARG.is_null(0) => {
+ // Mimicking the existing behavior of regexp_replace, if any
of the scalar arguments
+ // are actually null, then the result will be an array of the
same size as the first argument with all nulls.
+ //
+ // Also acts like an early abort mechanism when the input
array is empty.
+ return Ok(new_null_array(dt, $ARRAY_SIZE));
+ }
DataType::Utf8 => {
let array = as_string_array($ARG)?;
- if array.len() == 0 || array.is_null(0) {
- return $EARLY_ABORT(array, $ARRAY_SIZE);
- } else {
- array.value(0)
- }
+ array.value(0)
}
DataType::LargeUtf8 => {
let array = as_large_string_array($ARG)?;
- if array.len() == 0 || array.is_null(0) {
- return $EARLY_ABORT(array, $ARRAY_SIZE);
- } else {
- array.value(0)
- }
+ array.value(0)
}
DataType::Utf8View => {
let array = as_string_view_array($ARG)?;
- if array.len() == 0 || array.is_null(0) {
- return $EARLY_ABORT(array, $ARRAY_SIZE);
- } else {
- array.value(0)
- }
+ array.value(0)
}
_ => unreachable!(
"Invalid data type for regexp_replace: {}",
@@ -443,17 +426,11 @@ fn _regexp_replace_static_pattern_replace<T:
OffsetSizeTrait>(
args: &[ArrayRef],
) -> Result<ArrayRef> {
let array_size = args[0].len();
- let pattern =
- fetch_string_arg!(&args[1], "pattern", _regexp_replace_early_abort,
array_size);
- let replacement = fetch_string_arg!(
- &args[2],
- "replacement",
- _regexp_replace_early_abort,
- array_size
- );
+ let pattern = fetch_string_arg!(&args[1], "pattern", array_size);
+ let replacement = fetch_string_arg!(&args[2], "replacement", array_size);
let flags = match args.len() {
3 => None,
- 4 => Some(fetch_string_arg!(&args[3], "flags",
_regexp_replace_early_abort, array_size)),
+ 4 => Some(fetch_string_arg!(&args[3], "flags", array_size)),
other => {
return exec_err!(
"regexp_replace was called with {other} arguments. It requires
at least 3 and at most 4."
@@ -538,7 +515,7 @@ fn _regexp_replace_static_pattern_replace<T:
OffsetSizeTrait>(
/// Determine which implementation of the regexp_replace to use based
/// on the given set of arguments.
-pub fn specialize_regexp_replace<T: OffsetSizeTrait>(
+fn specialize_regexp_replace<T: OffsetSizeTrait>(
args: &[ColumnarValue],
) -> Result<ArrayRef> {
// This will serve as a dispatch table where we can
diff --git a/datafusion/functions/src/string/chr.rs
b/datafusion/functions/src/string/chr.rs
index 4d2beafbae..8706c43214 100644
--- a/datafusion/functions/src/string/chr.rs
+++ b/datafusion/functions/src/string/chr.rs
@@ -33,7 +33,7 @@ use datafusion_macros::user_doc;
/// Returns the character with the given code.
/// chr(65) = 'A'
-pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
let integer_array = as_int64_array(&args[0])?;
let mut builder = GenericStringBuilder::<i32>::with_capacity(
diff --git a/datafusion/functions/src/string/common.rs
b/datafusion/functions/src/string/common.rs
index 6bce289edb..34f1b6232d 100644
--- a/datafusion/functions/src/string/common.rs
+++ b/datafusion/functions/src/string/common.rs
@@ -32,6 +32,7 @@ use datafusion_common::Result;
use datafusion_common::{exec_err, ScalarValue};
use datafusion_expr::ColumnarValue;
+#[derive(Copy, Clone)]
pub(crate) enum TrimType {
Left,
Right,
@@ -48,7 +49,6 @@ impl Display for TrimType {
}
}
-#[expect(clippy::needless_pass_by_value)]
pub(crate) fn general_trim<T: OffsetSizeTrait>(
args: &[ArrayRef],
trim_type: TrimType,
diff --git a/datafusion/functions/src/string/concat.rs
b/datafusion/functions/src/string/concat.rs
index a93e70e714..3b53660463 100644
--- a/datafusion/functions/src/string/concat.rs
+++ b/datafusion/functions/src/string/concat.rs
@@ -287,7 +287,7 @@ impl ScalarUDFImpl for ConcatFunc {
}
}
-pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
+pub(crate) fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
let mut new_args = Vec::with_capacity(args.len());
let mut contiguous_scalar = "".to_string();
diff --git a/datafusion/functions/src/string/split_part.rs
b/datafusion/functions/src/string/split_part.rs
index b32eba990d..c8b293f298 100644
--- a/datafusion/functions/src/string/split_part.rs
+++ b/datafusion/functions/src/string/split_part.rs
@@ -123,64 +123,64 @@ impl ScalarUDFImpl for SplitPartFunc {
let result = match (args[0].data_type(), args[1].data_type()) {
(DataType::Utf8View, DataType::Utf8View) => {
split_part_impl::<&StringViewArray, &StringViewArray, i32>(
- args[0].as_string_view(),
- args[1].as_string_view(),
+ &args[0].as_string_view(),
+ &args[1].as_string_view(),
n_array,
)
}
(DataType::Utf8View, DataType::Utf8) => {
split_part_impl::<&StringViewArray, &GenericStringArray<i32>,
i32>(
- args[0].as_string_view(),
- args[1].as_string::<i32>(),
+ &args[0].as_string_view(),
+ &args[1].as_string::<i32>(),
n_array,
)
}
(DataType::Utf8View, DataType::LargeUtf8) => {
split_part_impl::<&StringViewArray, &GenericStringArray<i64>,
i32>(
- args[0].as_string_view(),
- args[1].as_string::<i64>(),
+ &args[0].as_string_view(),
+ &args[1].as_string::<i64>(),
n_array,
)
}
(DataType::Utf8, DataType::Utf8View) => {
split_part_impl::<&GenericStringArray<i32>, &StringViewArray,
i32>(
- args[0].as_string::<i32>(),
- args[1].as_string_view(),
+ &args[0].as_string::<i32>(),
+ &args[1].as_string_view(),
n_array,
)
}
(DataType::LargeUtf8, DataType::Utf8View) => {
split_part_impl::<&GenericStringArray<i64>, &StringViewArray,
i64>(
- args[0].as_string::<i64>(),
- args[1].as_string_view(),
+ &args[0].as_string::<i64>(),
+ &args[1].as_string_view(),
n_array,
)
}
(DataType::Utf8, DataType::Utf8) => {
split_part_impl::<&GenericStringArray<i32>,
&GenericStringArray<i32>, i32>(
- args[0].as_string::<i32>(),
- args[1].as_string::<i32>(),
+ &args[0].as_string::<i32>(),
+ &args[1].as_string::<i32>(),
n_array,
)
}
(DataType::LargeUtf8, DataType::LargeUtf8) => {
split_part_impl::<&GenericStringArray<i64>,
&GenericStringArray<i64>, i64>(
- args[0].as_string::<i64>(),
- args[1].as_string::<i64>(),
+ &args[0].as_string::<i64>(),
+ &args[1].as_string::<i64>(),
n_array,
)
}
(DataType::Utf8, DataType::LargeUtf8) => {
split_part_impl::<&GenericStringArray<i32>,
&GenericStringArray<i64>, i32>(
- args[0].as_string::<i32>(),
- args[1].as_string::<i64>(),
+ &args[0].as_string::<i32>(),
+ &args[1].as_string::<i64>(),
n_array,
)
}
(DataType::LargeUtf8, DataType::Utf8) => {
split_part_impl::<&GenericStringArray<i64>,
&GenericStringArray<i32>, i64>(
- args[0].as_string::<i64>(),
- args[1].as_string::<i32>(),
+ &args[0].as_string::<i64>(),
+ &args[1].as_string::<i32>(),
n_array,
)
}
@@ -200,11 +200,9 @@ impl ScalarUDFImpl for SplitPartFunc {
}
}
-/// impl
-#[expect(clippy::needless_pass_by_value)]
-pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>(
- string_array: StringArrType,
- delimiter_array: DelimiterArrType,
+fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>(
+ string_array: &StringArrType,
+ delimiter_array: &DelimiterArrType,
n_array: &Int64Array,
) -> Result<ArrayRef>
where
diff --git a/datafusion/functions/src/string/to_hex.rs
b/datafusion/functions/src/string/to_hex.rs
index 26be0066c2..4000f3bb3b 100644
--- a/datafusion/functions/src/string/to_hex.rs
+++ b/datafusion/functions/src/string/to_hex.rs
@@ -39,7 +39,7 @@ use datafusion_macros::user_doc;
/// Converts the number to its equivalent hexadecimal representation.
/// to_hex(2147483647) = '7fffffff'
-pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
+fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
where
T::Native: std::fmt::LowerHex,
{
diff --git a/datafusion/functions/src/unicode/find_in_set.rs
b/datafusion/functions/src/unicode/find_in_set.rs
index 649bd631d1..e83e3d99a3 100644
--- a/datafusion/functions/src/unicode/find_in_set.rs
+++ b/datafusion/functions/src/unicode/find_in_set.rs
@@ -263,10 +263,7 @@ fn find_in_set(str: &ArrayRef, str_list: &ArrayRef) ->
Result<ArrayRef> {
}
}
-pub fn find_in_set_general<'a, T, V>(
- string_array: V,
- str_list_array: V,
-) -> Result<ArrayRef>
+fn find_in_set_general<'a, T, V>(string_array: V, str_list_array: V) ->
Result<ArrayRef>
where
T: ArrowPrimitiveType,
T::Native: OffsetSizeTrait,
diff --git a/datafusion/functions/src/unicode/left.rs
b/datafusion/functions/src/unicode/left.rs
index fceb2a131a..ec7ec456ab 100644
--- a/datafusion/functions/src/unicode/left.rs
+++ b/datafusion/functions/src/unicode/left.rs
@@ -122,7 +122,7 @@ impl ScalarUDFImpl for LeftFunc {
/// Returns first n characters in the string, or when n is negative, returns
all but last |n| characters.
/// left('abcde', 2) = 'ab'
/// The implementation uses UTF-8 code points as characters
-pub fn left<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn left<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let n_array = as_int64_array(&args[1])?;
if args[0].data_type() == &DataType::Utf8View {
diff --git a/datafusion/functions/src/unicode/lpad.rs
b/datafusion/functions/src/unicode/lpad.rs
index b69af247b9..6940459b17 100644
--- a/datafusion/functions/src/unicode/lpad.rs
+++ b/datafusion/functions/src/unicode/lpad.rs
@@ -129,7 +129,7 @@ impl ScalarUDFImpl for LPadFunc {
/// Extends the string to length 'length' by prepending the characters fill (a
space by default).
/// If the string is already longer than length then it is truncated (on the
right).
/// lpad('hi', 5, 'xy') = 'xyxhi'
-pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() <= 1 || args.len() > 3 {
return exec_err!(
"lpad was called with {} arguments. It requires at least 2 and at
most 3.",
diff --git a/datafusion/functions/src/unicode/reverse.rs
b/datafusion/functions/src/unicode/reverse.rs
index b5f870d54b..56f6048d6b 100644
--- a/datafusion/functions/src/unicode/reverse.rs
+++ b/datafusion/functions/src/unicode/reverse.rs
@@ -106,7 +106,7 @@ impl ScalarUDFImpl for ReverseFunc {
/// Reverses the order of the characters in the string `reverse('abcde') =
'edcba'`.
/// The implementation uses UTF-8 code points as characters
-pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
if args[0].data_type() == &Utf8View {
reverse_impl::<T, _>(&args[0].as_string_view())
} else {
diff --git a/datafusion/functions/src/unicode/right.rs
b/datafusion/functions/src/unicode/right.rs
index c492f606e9..670586e11b 100644
--- a/datafusion/functions/src/unicode/right.rs
+++ b/datafusion/functions/src/unicode/right.rs
@@ -122,7 +122,7 @@ impl ScalarUDFImpl for RightFunc {
/// Returns last n characters in the string, or when n is negative, returns
all but first |n| characters.
/// right('abcde', 2) = 'de'
/// The implementation uses UTF-8 code points as characters
-pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let n_array = as_int64_array(&args[1])?;
if args[0].data_type() == &DataType::Utf8View {
// string_view_right(args)
diff --git a/datafusion/functions/src/unicode/rpad.rs
b/datafusion/functions/src/unicode/rpad.rs
index d644df9874..a7e951051d 100644
--- a/datafusion/functions/src/unicode/rpad.rs
+++ b/datafusion/functions/src/unicode/rpad.rs
@@ -145,7 +145,7 @@ impl ScalarUDFImpl for RPadFunc {
}
}
-pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
+fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
args: &[ArrayRef],
) -> Result<ArrayRef> {
if args.len() < 2 || args.len() > 3 {
@@ -205,7 +205,7 @@ pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen:
OffsetSizeTrait>(
/// Extends the string to length 'length' by appending the characters fill (a
space by default). If the string is already longer than length then it is
truncated.
/// rpad('hi', 5, 'xy') = 'hixyx'
-pub fn rpad_impl<'a, StringArrType, FillArrType, StringArrayLen>(
+fn rpad_impl<'a, StringArrType, FillArrType, StringArrayLen>(
string_array: &StringArrType,
length_array: &Int64Array,
fill_array: Option<FillArrType>,
diff --git a/datafusion/functions/src/unicode/substr.rs
b/datafusion/functions/src/unicode/substr.rs
index 6eee49d490..27b194ca2b 100644
--- a/datafusion/functions/src/unicode/substr.rs
+++ b/datafusion/functions/src/unicode/substr.rs
@@ -141,7 +141,7 @@ impl ScalarUDFImpl for SubstrFunc {
/// substr('alphabet', 3) = 'phabet'
/// substr('alphabet', 3, 2) = 'ph'
/// The implementation uses UTF-8 code points as characters
-pub fn substr(args: &[ArrayRef]) -> Result<ArrayRef> {
+fn substr(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
DataType::Utf8 => {
let string_array = args[0].as_string::<i32>();
diff --git a/datafusion/functions/src/unicode/substrindex.rs
b/datafusion/functions/src/unicode/substrindex.rs
index a7ee7388f9..bf59787206 100644
--- a/datafusion/functions/src/unicode/substrindex.rs
+++ b/datafusion/functions/src/unicode/substrindex.rs
@@ -169,7 +169,7 @@ fn substr_index(args: &[ArrayRef]) -> Result<ArrayRef> {
}
}
-pub fn substr_index_general<
+fn substr_index_general<
'a,
T: ArrowPrimitiveType,
V: ArrayAccessor<Item = &'a str>,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]