This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 45ed5aa5d0 fix: `core_expressions` feature flag broken, move `overlay`
into `core` functions (#15217)
45ed5aa5d0 is described below
commit 45ed5aa5d008f7334bf64efc708038cd0c22059e
Author: Shruti Sharma <[email protected]>
AuthorDate: Tue Mar 25 00:33:38 2025 +0530
fix: `core_expressions` feature flag broken, move `overlay` into `core`
functions (#15217)
* fix: remove core_expressions, move overlay to core
* fix license header
* fix header
* update doc
* Remove CI reference
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
.github/workflows/rust.yml | 3 -
datafusion/functions/Cargo.toml | 4 -
datafusion/functions/src/core/mod.rs | 7 +
.../functions/src/{string => core}/overlay.rs | 0
datafusion/functions/src/core/planner.rs | 2 +-
datafusion/functions/src/lib.rs | 5 +-
datafusion/functions/src/string/mod.rs | 5 -
datafusion/functions/src/string/overlay.rs | 263 +--------------------
docs/source/user-guide/sql/scalar_functions.md | 27 +++
9 files changed, 39 insertions(+), 277 deletions(-)
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 599e6e3cc3..1e6cd97ace 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -235,9 +235,6 @@ jobs:
#
- name: Check datafusion-functions (no-default-features)
run: cargo check --profile ci --no-default-features -p
datafusion-functions
- # Fails due https://github.com/apache/datafusion/issues/15207
- #- name: Check datafusion-functions (core_expressions)
- # run: cargo check --profile ci --no-default-features -p
datafusion-functions --features=core_expressions
- name: Check datafusion-functions (crypto_expressions)
run: cargo check --profile ci --no-default-features -p
datafusion-functions --features=crypto_expressions
- name: Check datafusion-functions (datetime_expressions)
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 07e4973675..31ff55121b 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -35,14 +35,11 @@ all-features = true
workspace = true
[features]
-# enable core functions
-core_expressions = []
crypto_expressions = ["md-5", "sha2", "blake2", "blake3"]
# enable datetime functions
datetime_expressions = []
# Enable encoding by default so the doctests work. In general don't
automatically enable all packages.
default = [
- "core_expressions",
"datetime_expressions",
"encoding_expressions",
"math_expressions",
@@ -146,7 +143,6 @@ required-features = ["math_expressions"]
[[bench]]
harness = false
name = "nullif"
-required-features = ["core_expressions"]
[[bench]]
harness = false
diff --git a/datafusion/functions/src/core/mod.rs
b/datafusion/functions/src/core/mod.rs
index 425ce78dec..c6329b1ee0 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -32,6 +32,7 @@ pub mod named_struct;
pub mod nullif;
pub mod nvl;
pub mod nvl2;
+pub mod overlay;
pub mod planner;
pub mod r#struct;
pub mod union_extract;
@@ -42,6 +43,7 @@ make_udf_function!(arrow_cast::ArrowCastFunc, arrow_cast);
make_udf_function!(nullif::NullIfFunc, nullif);
make_udf_function!(nvl::NVLFunc, nvl);
make_udf_function!(nvl2::NVL2Func, nvl2);
+make_udf_function!(overlay::OverlayFunc, overlay);
make_udf_function!(arrowtypeof::ArrowTypeOfFunc, arrow_typeof);
make_udf_function!(r#struct::StructFunc, r#struct);
make_udf_function!(named_struct::NamedStructFunc, named_struct);
@@ -71,6 +73,10 @@ pub mod expr_fn {
nvl2,
"Returns value2 if value1 is not NULL; otherwise, it returns value3.",
arg1 arg2 arg3
+ ),(
+ overlay,
+ "replace the substring of string that starts at the start'th character
and extends for count characters with new substring",
+ args,
),(
arrow_typeof,
"Returns the Arrow type of the input expression.",
@@ -115,6 +121,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
arrow_cast(),
nvl(),
nvl2(),
+ overlay(),
arrow_typeof(),
named_struct(),
// Note: most users invoke `get_field` indirectly via field access
diff --git a/datafusion/functions/src/string/overlay.rs
b/datafusion/functions/src/core/overlay.rs
similarity index 100%
copy from datafusion/functions/src/string/overlay.rs
copy to datafusion/functions/src/core/overlay.rs
diff --git a/datafusion/functions/src/core/planner.rs
b/datafusion/functions/src/core/planner.rs
index 717a74797c..227e401156 100644
--- a/datafusion/functions/src/core/planner.rs
+++ b/datafusion/functions/src/core/planner.rs
@@ -60,7 +60,7 @@ impl ExprPlanner for CoreFunctionPlanner {
fn plan_overlay(&self, args: Vec<Expr>) ->
Result<PlannerResult<Vec<Expr>>> {
Ok(PlannerResult::Planned(Expr::ScalarFunction(
- ScalarFunction::new_udf(crate::string::overlay(), args),
+ ScalarFunction::new_udf(crate::core::overlay(), args),
)))
}
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index 7753b9a6dc..b65c4c5432 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -100,10 +100,8 @@ pub mod string;
make_stub_package!(string, "string_expressions");
/// Core datafusion expressions
-/// Enabled via feature flag `core_expressions`
-#[cfg(feature = "core_expressions")]
+/// These are always available and not controlled by a feature flag
pub mod core;
-make_stub_package!(core, "core_expressions");
/// Date and time expressions.
/// Contains functions such as to_timestamp
@@ -148,7 +146,6 @@ pub mod utils;
/// Fluent-style API for creating `Expr`s
pub mod expr_fn {
- #[cfg(feature = "core_expressions")]
pub use super::core::expr_fn::*;
#[cfg(feature = "crypto_expressions")]
pub use super::crypto::expr_fn::*;
diff --git a/datafusion/functions/src/string/mod.rs
b/datafusion/functions/src/string/mod.rs
index 442c055ac3..4c59e26444 100644
--- a/datafusion/functions/src/string/mod.rs
+++ b/datafusion/functions/src/string/mod.rs
@@ -55,7 +55,6 @@ make_udf_function!(levenshtein::LevenshteinFunc, levenshtein);
make_udf_function!(ltrim::LtrimFunc, ltrim);
make_udf_function!(lower::LowerFunc, lower);
make_udf_function!(octet_length::OctetLengthFunc, octet_length);
-make_udf_function!(overlay::OverlayFunc, overlay);
make_udf_function!(repeat::RepeatFunc, repeat);
make_udf_function!(replace::ReplaceFunc, replace);
make_udf_function!(rtrim::RtrimFunc, rtrim);
@@ -108,10 +107,6 @@ pub mod expr_fn {
octet_length,
"returns the number of bytes of a string",
args
- ),(
- overlay,
- "replace the substring of string that starts at the start'th character
and extends for count characters with new substring",
- args,
),(
repeat,
"Repeats the `string` to `n` times",
diff --git a/datafusion/functions/src/string/overlay.rs
b/datafusion/functions/src/string/overlay.rs
index 0ea5359e96..4a665dfde2 100644
--- a/datafusion/functions/src/string/overlay.rs
+++ b/datafusion/functions/src/string/overlay.rs
@@ -15,264 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
-use arrow::datatypes::DataType;
-
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-use datafusion_common::cast::{
- as_generic_string_array, as_int64_array, as_string_view_array,
-};
-use datafusion_common::{exec_err, Result};
-use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
-use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
-use datafusion_macros::user_doc;
-
-#[user_doc(
- doc_section(label = "String Functions"),
- description = "Returns the string which is replaced by another string from
the specified position and specified count length.",
- syntax_example = "overlay(str PLACING substr FROM pos [FOR count])",
- sql_example = r#"```sql
-> select overlay('Txxxxas' placing 'hom' from 2 for 4);
-+--------------------------------------------------------+
-| overlay(Utf8("Txxxxas"),Utf8("hom"),Int64(2),Int64(4)) |
-+--------------------------------------------------------+
-| Thomas |
-+--------------------------------------------------------+
-```"#,
- standard_argument(name = "str", prefix = "String"),
- argument(name = "substr", description = "Substring to replace in str."),
- argument(
- name = "pos",
- description = "The start position to start the replace in str."
- ),
- argument(
- name = "count",
- description = "The count of characters to be replaced from start
position of str. If not specified, will use substr length instead."
- )
+#[deprecated(
+ note = "overlay has been moved to core. Update imports to use
core::overlay."
)]
-#[derive(Debug)]
-pub struct OverlayFunc {
- signature: Signature,
-}
-
-impl Default for OverlayFunc {
- fn default() -> Self {
- Self::new()
- }
-}
-
-impl OverlayFunc {
- pub fn new() -> Self {
- use DataType::*;
- Self {
- signature: Signature::one_of(
- vec![
- TypeSignature::Exact(vec![Utf8View, Utf8View, Int64,
Int64]),
- TypeSignature::Exact(vec![Utf8, Utf8, Int64, Int64]),
- TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64,
Int64]),
- TypeSignature::Exact(vec![Utf8View, Utf8View, Int64]),
- TypeSignature::Exact(vec![Utf8, Utf8, Int64]),
- TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64]),
- ],
- Volatility::Immutable,
- ),
- }
- }
-}
-
-impl ScalarUDFImpl for OverlayFunc {
- fn as_any(&self) -> &dyn Any {
- self
- }
-
- fn name(&self) -> &str {
- "overlay"
- }
-
- fn signature(&self) -> &Signature {
- &self.signature
- }
-
- fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- utf8_to_str_type(&arg_types[0], "overlay")
- }
-
- fn invoke_with_args(&self, args: ScalarFunctionArgs) ->
Result<ColumnarValue> {
- match args.args[0].data_type() {
- DataType::Utf8View | DataType::Utf8 => {
- make_scalar_function(overlay::<i32>, vec![])(&args.args)
- }
- DataType::LargeUtf8 => {
- make_scalar_function(overlay::<i64>, vec![])(&args.args)
- }
- other => exec_err!("Unsupported data type {other:?} for function
overlay"),
- }
- }
-
- fn documentation(&self) -> Option<&Documentation> {
- self.doc()
- }
-}
-
-macro_rules! process_overlay {
- // For the three-argument case
- ($string_array:expr, $characters_array:expr, $pos_num:expr) => {{
- $string_array
- .iter()
- .zip($characters_array.iter())
- .zip($pos_num.iter())
- .map(|((string, characters), start_pos)| {
- match (string, characters, start_pos) {
- (Some(string), Some(characters), Some(start_pos)) => {
- let string_len = string.chars().count();
- let characters_len = characters.chars().count();
- let replace_len = characters_len as i64;
- let mut res =
- String::with_capacity(string_len.max(characters_len));
-
- //as sql replace index start from 1 while string index
start from 0
- if start_pos > 1 && start_pos - 1 < string_len as i64 {
- let start = (start_pos - 1) as usize;
- res.push_str(&string[..start]);
- }
- res.push_str(characters);
- // if start + replace_len - 1 >= string_length, just to
string end
- if start_pos + replace_len - 1 < string_len as i64 {
- let end = (start_pos + replace_len - 1) as usize;
- res.push_str(&string[end..]);
- }
- Ok(Some(res))
- }
- _ => Ok(None),
- }
- })
- .collect::<Result<GenericStringArray<T>>>()
- }};
-
- // For the four-argument case
- ($string_array:expr, $characters_array:expr, $pos_num:expr, $len_num:expr)
=> {{
- $string_array
- .iter()
- .zip($characters_array.iter())
- .zip($pos_num.iter())
- .zip($len_num.iter())
- .map(|(((string, characters), start_pos), len)| {
- match (string, characters, start_pos, len) {
- (Some(string), Some(characters), Some(start_pos), Some(len))
=> {
- let string_len = string.chars().count();
- let characters_len = characters.chars().count();
- let replace_len = len.min(string_len as i64);
- let mut res =
- String::with_capacity(string_len.max(characters_len));
-
- //as sql replace index start from 1 while string index
start from 0
- if start_pos > 1 && start_pos - 1 < string_len as i64 {
- let start = (start_pos - 1) as usize;
- res.push_str(&string[..start]);
- }
- res.push_str(characters);
- // if start + replace_len - 1 >= string_length, just to
string end
- if start_pos + replace_len - 1 < string_len as i64 {
- let end = (start_pos + replace_len - 1) as usize;
- res.push_str(&string[end..]);
- }
- Ok(Some(res))
- }
- _ => Ok(None),
- }
- })
- .collect::<Result<GenericStringArray<T>>>()
- }};
-}
-
-/// OVERLAY(string1 PLACING string2 FROM integer FOR integer2)
-/// Replaces a substring of string1 with string2 starting at the integer bit
-/// pgsql overlay('Txxxxas' placing 'hom' from 2 for 4) → Thomas
-/// overlay('Txxxxas' placing 'hom' from 2) -> Thomxas, without for option,
str2's len is instead
-fn overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
- let use_string_view = args[0].data_type() == &DataType::Utf8View;
- if use_string_view {
- string_view_overlay::<T>(args)
- } else {
- string_overlay::<T>(args)
- }
-}
-
-pub fn string_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
- match args.len() {
- 3 => {
- let string_array = as_generic_string_array::<T>(&args[0])?;
- let characters_array = as_generic_string_array::<T>(&args[1])?;
- let pos_num = as_int64_array(&args[2])?;
-
- let result = process_overlay!(string_array, characters_array,
pos_num)?;
- Ok(Arc::new(result) as ArrayRef)
- }
- 4 => {
- let string_array = as_generic_string_array::<T>(&args[0])?;
- let characters_array = as_generic_string_array::<T>(&args[1])?;
- let pos_num = as_int64_array(&args[2])?;
- let len_num = as_int64_array(&args[3])?;
-
- let result =
- process_overlay!(string_array, characters_array, pos_num,
len_num)?;
- Ok(Arc::new(result) as ArrayRef)
- }
- other => {
- exec_err!("overlay was called with {other} arguments. It requires
3 or 4.")
- }
- }
-}
-
-pub fn string_view_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
- match args.len() {
- 3 => {
- let string_array = as_string_view_array(&args[0])?;
- let characters_array = as_string_view_array(&args[1])?;
- let pos_num = as_int64_array(&args[2])?;
-
- let result = process_overlay!(string_array, characters_array,
pos_num)?;
- Ok(Arc::new(result) as ArrayRef)
- }
- 4 => {
- let string_array = as_string_view_array(&args[0])?;
- let characters_array = as_string_view_array(&args[1])?;
- let pos_num = as_int64_array(&args[2])?;
- let len_num = as_int64_array(&args[3])?;
-
- let result =
- process_overlay!(string_array, characters_array, pos_num,
len_num)?;
- Ok(Arc::new(result) as ArrayRef)
- }
- other => {
- exec_err!("overlay was called with {other} arguments. It requires
3 or 4.")
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use arrow::array::{Int64Array, StringArray};
-
- use super::*;
-
- #[test]
- fn to_overlay() -> Result<()> {
- let string =
- Arc::new(StringArray::from(vec!["123", "abcdefg", "xyz",
"Txxxxas"]));
- let replace_string =
- Arc::new(StringArray::from(vec!["abc", "qwertyasdfg", "ijk",
"hom"]));
- let start = Arc::new(Int64Array::from(vec![4, 1, 1, 2])); // start
- let end = Arc::new(Int64Array::from(vec![5, 7, 2, 4])); // replace len
-
- let res = overlay::<i32>(&[string, replace_string, start,
end]).unwrap();
- let result = as_generic_string_array::<i32>(&res).unwrap();
- let expected = StringArray::from(vec!["abc", "qwertyasdfg", "ijkz",
"Thomas"]);
- assert_eq!(&expected, result);
-
- Ok(())
- }
-}
+pub use crate::core::overlay::*;
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index 60ecf7bd78..0f08934c8a 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -745,6 +745,7 @@ nvl2(expression1, expression2, expression3)
- [lpad](#lpad)
- [ltrim](#ltrim)
- [octet_length](#octet_length)
+- [overlay](#overlay)
- [position](#position)
- [repeat](#repeat)
- [replace](#replace)
@@ -1282,6 +1283,32 @@ octet_length(str)
- [bit_length](#bit_length)
- [length](#length)
+### `overlay`
+
+Returns the string which is replaced by another string from the specified
position and specified count length.
+
+```sql
+overlay(str PLACING substr FROM pos [FOR count])
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **substr**: Substring to replace in str.
+- **pos**: The start position to start the replace in str.
+- **count**: The count of characters to be replaced from start position of
str. If not specified, will use substr length instead.
+
+#### Example
+
+```sql
+> select overlay('Txxxxas' placing 'hom' from 2 for 4);
++--------------------------------------------------------+
+| overlay(Utf8("Txxxxas"),Utf8("hom"),Int64(2),Int64(4)) |
++--------------------------------------------------------+
+| Thomas |
++--------------------------------------------------------+
+```
+
### `position`
_Alias of [strpos](#strpos)._
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]