This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 45ed5aa5d0 fix: `core_expressions` feature flag broken, move `overlay` 
into `core` functions (#15217)
45ed5aa5d0 is described below

commit 45ed5aa5d008f7334bf64efc708038cd0c22059e
Author: Shruti Sharma <[email protected]>
AuthorDate: Tue Mar 25 00:33:38 2025 +0530

    fix: `core_expressions` feature flag broken, move `overlay` into `core` 
functions (#15217)
    
    * fix: remove core_expressions, move overlay to core
    
    * fix license header
    
    * fix header
    
    * update doc
    
    * Remove CI reference
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 .github/workflows/rust.yml                         |   3 -
 datafusion/functions/Cargo.toml                    |   4 -
 datafusion/functions/src/core/mod.rs               |   7 +
 .../functions/src/{string => core}/overlay.rs      |   0
 datafusion/functions/src/core/planner.rs           |   2 +-
 datafusion/functions/src/lib.rs                    |   5 +-
 datafusion/functions/src/string/mod.rs             |   5 -
 datafusion/functions/src/string/overlay.rs         | 263 +--------------------
 docs/source/user-guide/sql/scalar_functions.md     |  27 +++
 9 files changed, 39 insertions(+), 277 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 599e6e3cc3..1e6cd97ace 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -235,9 +235,6 @@ jobs:
       #
       - name: Check datafusion-functions (no-default-features)
         run: cargo check --profile ci --no-default-features -p 
datafusion-functions
-      # Fails due https://github.com/apache/datafusion/issues/15207
-      #- name: Check datafusion-functions (core_expressions)
-      #  run: cargo check --profile ci --no-default-features -p 
datafusion-functions --features=core_expressions
       - name: Check datafusion-functions (crypto_expressions)
         run: cargo check --profile ci --no-default-features -p 
datafusion-functions --features=crypto_expressions
       - name: Check datafusion-functions (datetime_expressions)
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 07e4973675..31ff55121b 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -35,14 +35,11 @@ all-features = true
 workspace = true
 
 [features]
-# enable core functions
-core_expressions = []
 crypto_expressions = ["md-5", "sha2", "blake2", "blake3"]
 # enable datetime functions
 datetime_expressions = []
 # Enable encoding by default so the doctests work. In general don't 
automatically enable all packages.
 default = [
-    "core_expressions",
     "datetime_expressions",
     "encoding_expressions",
     "math_expressions",
@@ -146,7 +143,6 @@ required-features = ["math_expressions"]
 [[bench]]
 harness = false
 name = "nullif"
-required-features = ["core_expressions"]
 
 [[bench]]
 harness = false
diff --git a/datafusion/functions/src/core/mod.rs 
b/datafusion/functions/src/core/mod.rs
index 425ce78dec..c6329b1ee0 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -32,6 +32,7 @@ pub mod named_struct;
 pub mod nullif;
 pub mod nvl;
 pub mod nvl2;
+pub mod overlay;
 pub mod planner;
 pub mod r#struct;
 pub mod union_extract;
@@ -42,6 +43,7 @@ make_udf_function!(arrow_cast::ArrowCastFunc, arrow_cast);
 make_udf_function!(nullif::NullIfFunc, nullif);
 make_udf_function!(nvl::NVLFunc, nvl);
 make_udf_function!(nvl2::NVL2Func, nvl2);
+make_udf_function!(overlay::OverlayFunc, overlay);
 make_udf_function!(arrowtypeof::ArrowTypeOfFunc, arrow_typeof);
 make_udf_function!(r#struct::StructFunc, r#struct);
 make_udf_function!(named_struct::NamedStructFunc, named_struct);
@@ -71,6 +73,10 @@ pub mod expr_fn {
         nvl2,
         "Returns value2 if value1 is not NULL; otherwise, it returns value3.",
         arg1 arg2 arg3
+    ),(
+        overlay,
+        "replace the substring of string that starts at the start'th character 
and extends for count characters with new substring",
+        args,
     ),(
         arrow_typeof,
         "Returns the Arrow type of the input expression.",
@@ -115,6 +121,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         arrow_cast(),
         nvl(),
         nvl2(),
+        overlay(),
         arrow_typeof(),
         named_struct(),
         // Note: most users invoke `get_field` indirectly via field access
diff --git a/datafusion/functions/src/string/overlay.rs 
b/datafusion/functions/src/core/overlay.rs
similarity index 100%
copy from datafusion/functions/src/string/overlay.rs
copy to datafusion/functions/src/core/overlay.rs
diff --git a/datafusion/functions/src/core/planner.rs 
b/datafusion/functions/src/core/planner.rs
index 717a74797c..227e401156 100644
--- a/datafusion/functions/src/core/planner.rs
+++ b/datafusion/functions/src/core/planner.rs
@@ -60,7 +60,7 @@ impl ExprPlanner for CoreFunctionPlanner {
 
     fn plan_overlay(&self, args: Vec<Expr>) -> 
Result<PlannerResult<Vec<Expr>>> {
         Ok(PlannerResult::Planned(Expr::ScalarFunction(
-            ScalarFunction::new_udf(crate::string::overlay(), args),
+            ScalarFunction::new_udf(crate::core::overlay(), args),
         )))
     }
 
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index 7753b9a6dc..b65c4c5432 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -100,10 +100,8 @@ pub mod string;
 make_stub_package!(string, "string_expressions");
 
 /// Core datafusion expressions
-/// Enabled via feature flag `core_expressions`
-#[cfg(feature = "core_expressions")]
+/// These are always available and not controlled by a feature flag
 pub mod core;
-make_stub_package!(core, "core_expressions");
 
 /// Date and time expressions.
 /// Contains functions such as to_timestamp
@@ -148,7 +146,6 @@ pub mod utils;
 
 /// Fluent-style API for creating `Expr`s
 pub mod expr_fn {
-    #[cfg(feature = "core_expressions")]
     pub use super::core::expr_fn::*;
     #[cfg(feature = "crypto_expressions")]
     pub use super::crypto::expr_fn::*;
diff --git a/datafusion/functions/src/string/mod.rs 
b/datafusion/functions/src/string/mod.rs
index 442c055ac3..4c59e26444 100644
--- a/datafusion/functions/src/string/mod.rs
+++ b/datafusion/functions/src/string/mod.rs
@@ -55,7 +55,6 @@ make_udf_function!(levenshtein::LevenshteinFunc, levenshtein);
 make_udf_function!(ltrim::LtrimFunc, ltrim);
 make_udf_function!(lower::LowerFunc, lower);
 make_udf_function!(octet_length::OctetLengthFunc, octet_length);
-make_udf_function!(overlay::OverlayFunc, overlay);
 make_udf_function!(repeat::RepeatFunc, repeat);
 make_udf_function!(replace::ReplaceFunc, replace);
 make_udf_function!(rtrim::RtrimFunc, rtrim);
@@ -108,10 +107,6 @@ pub mod expr_fn {
         octet_length,
         "returns the number of bytes of a string",
         args
-    ),(
-        overlay,
-        "replace the substring of string that starts at the start'th character 
and extends for count characters with new substring",
-        args,
     ),(
         repeat,
         "Repeats the `string` to `n` times",
diff --git a/datafusion/functions/src/string/overlay.rs 
b/datafusion/functions/src/string/overlay.rs
index 0ea5359e96..4a665dfde2 100644
--- a/datafusion/functions/src/string/overlay.rs
+++ b/datafusion/functions/src/string/overlay.rs
@@ -15,264 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
-use arrow::datatypes::DataType;
-
-use crate::utils::{make_scalar_function, utf8_to_str_type};
-use datafusion_common::cast::{
-    as_generic_string_array, as_int64_array, as_string_view_array,
-};
-use datafusion_common::{exec_err, Result};
-use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
-use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
-use datafusion_macros::user_doc;
-
-#[user_doc(
-    doc_section(label = "String Functions"),
-    description = "Returns the string which is replaced by another string from 
the specified position and specified count length.",
-    syntax_example = "overlay(str PLACING substr FROM pos [FOR count])",
-    sql_example = r#"```sql
-> select overlay('Txxxxas' placing 'hom' from 2 for 4);
-+--------------------------------------------------------+
-| overlay(Utf8("Txxxxas"),Utf8("hom"),Int64(2),Int64(4)) |
-+--------------------------------------------------------+
-| Thomas                                                 |
-+--------------------------------------------------------+
-```"#,
-    standard_argument(name = "str", prefix = "String"),
-    argument(name = "substr", description = "Substring to replace in str."),
-    argument(
-        name = "pos",
-        description = "The start position to start the replace in str."
-    ),
-    argument(
-        name = "count",
-        description = "The count of characters to be replaced from start 
position of str. If not specified, will use substr length instead."
-    )
+#[deprecated(
+    note = "overlay has been moved to core. Update imports to use 
core::overlay."
 )]
-#[derive(Debug)]
-pub struct OverlayFunc {
-    signature: Signature,
-}
-
-impl Default for OverlayFunc {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl OverlayFunc {
-    pub fn new() -> Self {
-        use DataType::*;
-        Self {
-            signature: Signature::one_of(
-                vec![
-                    TypeSignature::Exact(vec![Utf8View, Utf8View, Int64, 
Int64]),
-                    TypeSignature::Exact(vec![Utf8, Utf8, Int64, Int64]),
-                    TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64, 
Int64]),
-                    TypeSignature::Exact(vec![Utf8View, Utf8View, Int64]),
-                    TypeSignature::Exact(vec![Utf8, Utf8, Int64]),
-                    TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64]),
-                ],
-                Volatility::Immutable,
-            ),
-        }
-    }
-}
-
-impl ScalarUDFImpl for OverlayFunc {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn name(&self) -> &str {
-        "overlay"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        utf8_to_str_type(&arg_types[0], "overlay")
-    }
-
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
-        match args.args[0].data_type() {
-            DataType::Utf8View | DataType::Utf8 => {
-                make_scalar_function(overlay::<i32>, vec![])(&args.args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(overlay::<i64>, vec![])(&args.args)
-            }
-            other => exec_err!("Unsupported data type {other:?} for function 
overlay"),
-        }
-    }
-
-    fn documentation(&self) -> Option<&Documentation> {
-        self.doc()
-    }
-}
-
-macro_rules! process_overlay {
-    // For the three-argument case
-    ($string_array:expr, $characters_array:expr, $pos_num:expr) => {{
-        $string_array
-        .iter()
-        .zip($characters_array.iter())
-        .zip($pos_num.iter())
-        .map(|((string, characters), start_pos)| {
-            match (string, characters, start_pos) {
-                (Some(string), Some(characters), Some(start_pos)) => {
-                    let string_len = string.chars().count();
-                    let characters_len = characters.chars().count();
-                    let replace_len = characters_len as i64;
-                    let mut res =
-                        String::with_capacity(string_len.max(characters_len));
-
-                    //as sql replace index start from 1 while string index 
start from 0
-                    if start_pos > 1 && start_pos - 1 < string_len as i64 {
-                        let start = (start_pos - 1) as usize;
-                        res.push_str(&string[..start]);
-                    }
-                    res.push_str(characters);
-                    // if start + replace_len - 1 >= string_length, just to 
string end
-                    if start_pos + replace_len - 1 < string_len as i64 {
-                        let end = (start_pos + replace_len - 1) as usize;
-                        res.push_str(&string[end..]);
-                    }
-                    Ok(Some(res))
-                }
-                _ => Ok(None),
-            }
-        })
-        .collect::<Result<GenericStringArray<T>>>()
-    }};
-
-    // For the four-argument case
-    ($string_array:expr, $characters_array:expr, $pos_num:expr, $len_num:expr) 
=> {{
-        $string_array
-        .iter()
-        .zip($characters_array.iter())
-        .zip($pos_num.iter())
-        .zip($len_num.iter())
-        .map(|(((string, characters), start_pos), len)| {
-            match (string, characters, start_pos, len) {
-                (Some(string), Some(characters), Some(start_pos), Some(len)) 
=> {
-                    let string_len = string.chars().count();
-                    let characters_len = characters.chars().count();
-                    let replace_len = len.min(string_len as i64);
-                    let mut res =
-                        String::with_capacity(string_len.max(characters_len));
-
-                    //as sql replace index start from 1 while string index 
start from 0
-                    if start_pos > 1 && start_pos - 1 < string_len as i64 {
-                        let start = (start_pos - 1) as usize;
-                        res.push_str(&string[..start]);
-                    }
-                    res.push_str(characters);
-                    // if start + replace_len - 1 >= string_length, just to 
string end
-                    if start_pos + replace_len - 1 < string_len as i64 {
-                        let end = (start_pos + replace_len - 1) as usize;
-                        res.push_str(&string[end..]);
-                    }
-                    Ok(Some(res))
-                }
-                _ => Ok(None),
-            }
-        })
-        .collect::<Result<GenericStringArray<T>>>()
-    }};
-}
-
-/// OVERLAY(string1 PLACING string2 FROM integer FOR integer2)
-/// Replaces a substring of string1 with string2 starting at the integer bit
-/// pgsql overlay('Txxxxas' placing 'hom' from 2 for 4) → Thomas
-/// overlay('Txxxxas' placing 'hom' from 2) -> Thomxas, without for option, 
str2's len is instead
-fn overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let use_string_view = args[0].data_type() == &DataType::Utf8View;
-    if use_string_view {
-        string_view_overlay::<T>(args)
-    } else {
-        string_overlay::<T>(args)
-    }
-}
-
-pub fn string_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
-    match args.len() {
-        3 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let characters_array = as_generic_string_array::<T>(&args[1])?;
-            let pos_num = as_int64_array(&args[2])?;
-
-            let result = process_overlay!(string_array, characters_array, 
pos_num)?;
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        4 => {
-            let string_array = as_generic_string_array::<T>(&args[0])?;
-            let characters_array = as_generic_string_array::<T>(&args[1])?;
-            let pos_num = as_int64_array(&args[2])?;
-            let len_num = as_int64_array(&args[3])?;
-
-            let result =
-                process_overlay!(string_array, characters_array, pos_num, 
len_num)?;
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => {
-            exec_err!("overlay was called with {other} arguments. It requires 
3 or 4.")
-        }
-    }
-}
-
-pub fn string_view_overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> 
Result<ArrayRef> {
-    match args.len() {
-        3 => {
-            let string_array = as_string_view_array(&args[0])?;
-            let characters_array = as_string_view_array(&args[1])?;
-            let pos_num = as_int64_array(&args[2])?;
-
-            let result = process_overlay!(string_array, characters_array, 
pos_num)?;
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        4 => {
-            let string_array = as_string_view_array(&args[0])?;
-            let characters_array = as_string_view_array(&args[1])?;
-            let pos_num = as_int64_array(&args[2])?;
-            let len_num = as_int64_array(&args[3])?;
-
-            let result =
-                process_overlay!(string_array, characters_array, pos_num, 
len_num)?;
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => {
-            exec_err!("overlay was called with {other} arguments. It requires 
3 or 4.")
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::array::{Int64Array, StringArray};
-
-    use super::*;
-
-    #[test]
-    fn to_overlay() -> Result<()> {
-        let string =
-            Arc::new(StringArray::from(vec!["123", "abcdefg", "xyz", 
"Txxxxas"]));
-        let replace_string =
-            Arc::new(StringArray::from(vec!["abc", "qwertyasdfg", "ijk", 
"hom"]));
-        let start = Arc::new(Int64Array::from(vec![4, 1, 1, 2])); // start
-        let end = Arc::new(Int64Array::from(vec![5, 7, 2, 4])); // replace len
-
-        let res = overlay::<i32>(&[string, replace_string, start, 
end]).unwrap();
-        let result = as_generic_string_array::<i32>(&res).unwrap();
-        let expected = StringArray::from(vec!["abc", "qwertyasdfg", "ijkz", 
"Thomas"]);
-        assert_eq!(&expected, result);
-
-        Ok(())
-    }
-}
+pub use crate::core::overlay::*;
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index 60ecf7bd78..0f08934c8a 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -745,6 +745,7 @@ nvl2(expression1, expression2, expression3)
 - [lpad](#lpad)
 - [ltrim](#ltrim)
 - [octet_length](#octet_length)
+- [overlay](#overlay)
 - [position](#position)
 - [repeat](#repeat)
 - [replace](#replace)
@@ -1282,6 +1283,32 @@ octet_length(str)
 - [bit_length](#bit_length)
 - [length](#length)
 
+### `overlay`
+
+Returns the string which is replaced by another string from the specified 
position and specified count length.
+
+```sql
+overlay(str PLACING substr FROM pos [FOR count])
+```
+
+#### Arguments
+
+- **str**: String expression to operate on. Can be a constant, column, or 
function, and any combination of operators.
+- **substr**: Substring to replace in str.
+- **pos**: The start position to start the replace in str.
+- **count**: The count of characters to be replaced from start position of 
str. If not specified, will use substr length instead.
+
+#### Example
+
+```sql
+> select overlay('Txxxxas' placing 'hom' from 2 for 4);
++--------------------------------------------------------+
+| overlay(Utf8("Txxxxas"),Utf8("hom"),Int64(2),Int64(4)) |
++--------------------------------------------------------+
+| Thomas                                                 |
++--------------------------------------------------------+
+```
+
 ### `position`
 
 _Alias of [strpos](#strpos)._


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to