This is an automated email from the ASF dual-hosted git repository.

ytyou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 2ff02b25ec feat(spark): implement Spark datetime function last_day 
(#16828)
2ff02b25ec is described below

commit 2ff02b25ec4b5341f40daae1421666709b7b4f1d
Author: Alan Tang <[email protected]>
AuthorDate: Sat Jul 26 12:57:52 2025 +0800

    feat(spark): implement Spark datetime function last_day (#16828)
    
    * feat(spark): implement Spark datetime function last_day
    
    Signed-off-by: Alan Tang <[email protected]>
    
    * chore: fix the export function name
    
    Signed-off-by: Alan Tang <[email protected]>
    
    * chore: Fix Cargo.toml formatting
    
    Signed-off-by: Alan Tang <[email protected]>
    
    * test: add more tests for spark function last_day
    
    Signed-off-by: Alan Tang <[email protected]>
    
    * feat(spark): set the signature to be taking exactly one Date32 type
    
    Signed-off-by: Alan Tang <[email protected]>
    
    * test(spark): add more bad cases
    
    Signed-off-by: Alan Tang <[email protected]>
    
    * chore: clean up redundant package
    
    Signed-off-by: Alan Tang <[email protected]>
    
    ---------
    
    Signed-off-by: Alan Tang <[email protected]>
---
 Cargo.lock                                         |   1 +
 datafusion/spark/Cargo.toml                        |   1 +
 datafusion/spark/src/function/datetime/last_day.rs | 125 +++++++++++++++++++++
 datafusion/spark/src/function/datetime/mod.rs      |  17 ++-
 .../test_files/spark/datetime/last_day.slt         | 100 ++++++++++++++++-
 5 files changed, 238 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 320819c5ec..200a2bd5bf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2608,6 +2608,7 @@ name = "datafusion-spark"
 version = "49.0.0"
 dependencies = [
  "arrow",
+ "chrono",
  "criterion",
  "datafusion-catalog",
  "datafusion-common",
diff --git a/datafusion/spark/Cargo.toml b/datafusion/spark/Cargo.toml
index 9243103fff..bc7ae380f7 100644
--- a/datafusion/spark/Cargo.toml
+++ b/datafusion/spark/Cargo.toml
@@ -37,6 +37,7 @@ name = "datafusion_spark"
 
 [dependencies]
 arrow = { workspace = true }
+chrono = { workspace = true }
 datafusion-catalog = { workspace = true }
 datafusion-common = { workspace = true }
 datafusion-execution = { workspace = true }
diff --git a/datafusion/spark/src/function/datetime/last_day.rs 
b/datafusion/spark/src/function/datetime/last_day.rs
new file mode 100644
index 0000000000..5a748816f4
--- /dev/null
+++ b/datafusion/spark/src/function/datetime/last_day.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, AsArray, Date32Array};
+use arrow::datatypes::{DataType, Date32Type};
+use chrono::{Datelike, Duration, NaiveDate};
+use datafusion_common::{exec_datafusion_err, internal_err, Result, 
ScalarValue};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+
+#[derive(Debug)]
+pub struct SparkLastDay {
+    signature: Signature,
+}
+
+impl Default for SparkLastDay {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkLastDay {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::exact(vec![DataType::Date32], 
Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkLastDay {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "last_day"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Date32)
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
+        let ScalarFunctionArgs { args, .. } = args;
+        let [arg] = args.as_slice() else {
+            return internal_err!(
+                "Spark `last_day` function requires 1 argument, got {}",
+                args.len()
+            );
+        };
+        match arg {
+            ColumnarValue::Scalar(ScalarValue::Date32(days)) => {
+                if let Some(days) = days {
+                    Ok(ColumnarValue::Scalar(ScalarValue::Date32(Some(
+                        spark_last_day(*days)?,
+                    ))))
+                } else {
+                    Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
+                }
+            }
+            ColumnarValue::Array(array) => {
+                let result = match array.data_type() {
+                    DataType::Date32 => {
+                        let result: Date32Array = array
+                            .as_primitive::<Date32Type>()
+                            .try_unary(spark_last_day)?
+                            .with_data_type(DataType::Date32);
+                        Ok(Arc::new(result) as ArrayRef)
+                    }
+                    other => {
+                        internal_err!("Unsupported data type {other:?} for 
Spark function `last_day`")
+                    }
+                }?;
+                Ok(ColumnarValue::Array(result))
+            }
+            other => {
+                internal_err!("Unsupported arg {other:?} for Spark function 
`last_day")
+            }
+        }
+    }
+}
+
+fn spark_last_day(days: i32) -> Result<i32> {
+    let date = Date32Type::to_naive_date(days);
+
+    let (year, month) = (date.year(), date.month());
+    let (next_year, next_month) = if month == 12 {
+        (year + 1, 1)
+    } else {
+        (year, month + 1)
+    };
+
+    let first_day_next_month = NaiveDate::from_ymd_opt(next_year, next_month, 
1)
+        .ok_or_else(|| {
+            exec_datafusion_err!(
+                "Spark `last_day`: Unable to parse date from {next_year}, 
{next_month}, 1"
+            )
+        })?;
+
+    Ok(Date32Type::from_naive_date(
+        first_day_next_month - Duration::days(1),
+    ))
+}
diff --git a/datafusion/spark/src/function/datetime/mod.rs 
b/datafusion/spark/src/function/datetime/mod.rs
index a87df9a2c8..3bde960ae0 100644
--- a/datafusion/spark/src/function/datetime/mod.rs
+++ b/datafusion/spark/src/function/datetime/mod.rs
@@ -15,11 +15,24 @@
 // specific language governing permissions and limitations
 // under the License.
 
+pub mod last_day;
+
 use datafusion_expr::ScalarUDF;
+use datafusion_functions::make_udf_function;
 use std::sync::Arc;
 
-pub mod expr_fn {}
+make_udf_function!(last_day::SparkLastDay, last_day);
+
+pub mod expr_fn {
+    use datafusion_functions::export_functions;
+
+    export_functions!((
+        last_day,
+        "Returns the last day of the month which the date belongs to.",
+        arg1
+    ));
+}
 
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
-    vec![]
+    vec![last_day()]
 }
diff --git a/datafusion/sqllogictest/test_files/spark/datetime/last_day.slt 
b/datafusion/sqllogictest/test_files/spark/datetime/last_day.slt
index 29fb9ca11b..da3dd9711b 100644
--- a/datafusion/sqllogictest/test_files/spark/datetime/last_day.slt
+++ b/datafusion/sqllogictest/test_files/spark/datetime/last_day.slt
@@ -21,7 +21,99 @@
 # For more information, please see:
 #   https://github.com/apache/datafusion/issues/15914
 
-## Original Query: SELECT last_day('2009-01-12');
-## PySpark 3.5.5 Result: {'last_day(2009-01-12)': datetime.date(2009, 1, 31), 
'typeof(last_day(2009-01-12))': 'date', 'typeof(2009-01-12)': 'string'}
-#query
-#SELECT last_day('2009-01-12'::string);
+query D
+SELECT last_day('2009-01-12'::DATE);
+----
+2009-01-31
+
+
+query D
+SELECT last_day('2015-02-28'::DATE);
+----
+2015-02-28
+
+query D
+SELECT last_day('2015-03-27'::DATE);
+----
+2015-03-31
+
+query D
+SELECT last_day('2015-04-26'::DATE);
+----
+2015-04-30
+
+query D
+SELECT last_day('2015-05-25'::DATE);
+----
+2015-05-31
+
+query D
+SELECT last_day('2015-06-24'::DATE);
+----
+2015-06-30
+
+query D
+SELECT last_day('2015-07-23'::DATE);
+----
+2015-07-31
+
+query D
+SELECT last_day('2015-08-01'::DATE);
+----
+2015-08-31
+
+query D
+SELECT last_day('2015-09-02'::DATE);
+----
+2015-09-30
+
+query D
+SELECT last_day('2015-10-03'::DATE);
+----
+2015-10-31
+
+query D
+SELECT last_day('2015-11-04'::DATE);
+----
+2015-11-30
+
+query D
+SELECT last_day('2015-12-05'::DATE);
+----
+2015-12-31
+
+
+query D
+SELECT last_day('2016-01-06'::DATE);
+----
+2016-01-31
+
+query D
+SELECT last_day('2016-02-07'::DATE);
+----
+2016-02-29
+
+
+query D
+SELECT last_day(null::DATE);
+----
+NULL
+
+
+statement error Failed to coerce arguments to satisfy a call to 'last_day' 
function
+select last_day('foo');
+
+
+statement error Failed to coerce arguments to satisfy a call to 'last_day' 
function
+select last_day(123);
+
+
+statement error 'last_day' does not support zero arguments
+select last_day();
+
+statement error Failed to coerce arguments to satisfy a call to 'last_day' 
function
+select last_day(last_day('2016-02-07'::string, 'foo'));
+
+statement error Failed to coerce arguments to satisfy a call to 'last_day' 
function
+select last_day(last_day('2016-02-31'::string));
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to