This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 32bb26dcdf Add `to_unixtime` function (#9077)
32bb26dcdf is described below
commit 32bb26dcdfa5b03ef2d29ff1ddd7f483304d6afa
Author: Reilly.tang <[email protected]>
AuthorDate: Sat Mar 9 06:12:32 2024 +0800
Add `to_unixtime` function (#9077)
* [task #5568] add_to_unixtime_function
Signed-off-by: tangruilin <[email protected]>
* Fix clippy
---------
Signed-off-by: tangruilin <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/functions/src/datetime/mod.rs | 10 ++-
datafusion/functions/src/datetime/to_unixtime.rs | 83 +++++++++++++++++++++++
datafusion/proto/proto/datafusion.proto | 2 +
datafusion/proto/src/generated/prost.rs | 1 +
datafusion/sqllogictest/test_files/timestamps.slt | 69 +++++++++++++++++++
5 files changed, 164 insertions(+), 1 deletion(-)
diff --git a/datafusion/functions/src/datetime/mod.rs
b/datafusion/functions/src/datetime/mod.rs
index 233e8b2cdb..6c9ea1944b 100644
--- a/datafusion/functions/src/datetime/mod.rs
+++ b/datafusion/functions/src/datetime/mod.rs
@@ -24,9 +24,11 @@ use datafusion_expr::ScalarUDF;
mod common;
mod to_date;
mod to_timestamp;
+mod to_unixtime;
// create UDFs
make_udf_function!(to_date::ToDateFunc, TO_DATE, to_date);
+make_udf_function!(to_unixtime::ToUnixtimeFunc, TO_UNIXTIME, to_unixtime);
make_udf_function!(to_timestamp::ToTimestampFunc, TO_TIMESTAMP, to_timestamp);
make_udf_function!(
to_timestamp::ToTimestampSecondsFunc,
@@ -68,7 +70,7 @@ pub mod expr_fn {
/// # use datafusion_expr::col;
/// # use datafusion::prelude::*;
/// # use datafusion_functions::expr_fn::to_date;
- ///
+ ///
/// // define a schema.
/// let schema = Arc::new(Schema::new(vec![Field::new("a",
DataType::Utf8, false)]));
///
@@ -105,6 +107,11 @@ pub mod expr_fn {
super::to_date().call(args)
}
+ #[doc = "converts a string and optional formats to a Unixtime"]
+ pub fn to_unixtime(args: Vec<Expr>) -> Expr {
+ super::to_unixtime().call(args)
+ }
+
#[doc = "converts a string and optional formats to a
`Timestamp(Nanoseconds, None)`"]
pub fn to_timestamp(args: Vec<Expr>) -> Expr {
super::to_timestamp().call(args)
@@ -135,6 +142,7 @@ pub mod expr_fn {
pub fn functions() -> Vec<Arc<ScalarUDF>> {
vec![
to_date(),
+ to_unixtime(),
to_timestamp(),
to_timestamp_seconds(),
to_timestamp_millis(),
diff --git a/datafusion/functions/src/datetime/to_unixtime.rs
b/datafusion/functions/src/datetime/to_unixtime.rs
new file mode 100644
index 0000000000..ed56cdf8d0
--- /dev/null
+++ b/datafusion/functions/src/datetime/to_unixtime.rs
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow::datatypes::{DataType, TimeUnit};
+
+use crate::datetime::common::*;
+use datafusion_common::{exec_err, Result};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+
+use super::to_timestamp::ToTimestampSecondsFunc;
+
+#[derive(Debug)]
+pub(super) struct ToUnixtimeFunc {
+ signature: Signature,
+}
+
+impl ToUnixtimeFunc {
+ pub fn new() -> Self {
+ Self {
+ signature: Signature::variadic_any(Volatility::Immutable),
+ }
+ }
+}
+
+impl ScalarUDFImpl for ToUnixtimeFunc {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "to_unixtime"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+ Ok(DataType::Int64)
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.is_empty() {
+ return exec_err!("to_unixtime function requires 1 or more
arguments, got 0");
+ }
+
+ // validate that any args after the first one are Utf8
+ if args.len() > 1 {
+ validate_data_types(args, "to_unixtime")?;
+ }
+
+ match args[0].data_type() {
+ DataType::Int32 | DataType::Int64 | DataType::Null |
DataType::Float64 => {
+ args[0].cast_to(&DataType::Int64, None)
+ }
+ DataType::Date64 | DataType::Date32 | DataType::Timestamp(_, None)
=> args[0]
+ .cast_to(&DataType::Timestamp(TimeUnit::Second, None), None)?
+ .cast_to(&DataType::Int64, None),
+ DataType::Utf8 => ToTimestampSecondsFunc::new()
+ .invoke(args)?
+ .cast_to(&DataType::Int64, None),
+ other => {
+ exec_err!("Unsupported data type {:?} for function
to_unixtime", other)
+ }
+ }
+ }
+}
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index 6700877f26..faffe57c07 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -1,5 +1,6 @@
/*
+
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -684,6 +685,7 @@ enum ScalarFunction {
/// 135 is RegexpLike
ToChar = 136;
/// 137 was ToDate
+ /// 138 was ToUnixtime
}
message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index c12d04f33f..4c4b17f1a8 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2773,6 +2773,7 @@ pub enum ScalarFunction {
/// / 135 is RegexpLike
///
/// / 137 was ToDate
+ /// / 138 was ToUnixtime
ToChar = 136,
}
impl ScalarFunction {
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt
b/datafusion/sqllogictest/test_files/timestamps.slt
index f0483aec89..0b8284a45a 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -2682,3 +2682,72 @@ SELECT to_char(null, '%d-%m-%Y');
statement ok
drop table formats;
+
+##########
+## to_unixtime tests
+##########
+
+query I
+select to_unixtime('2020-09-08T12:00:00+00:00');
+----
+1599566400
+
+query I
+select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S',
'%+', '%m-%d-%Y %H:%M:%S%#z');
+----
+1673638290
+
+query I
+select to_unixtime('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f
%m-%d-%Y');
+----
+1684295940
+
+query I
+select to_unixtime(arrow_cast('2020-09-08T12:00:00+00:00', 'Date64'));
+----
+1599566400
+
+query I
+select to_unixtime(arrow_cast('2020-09-08', 'Date32'));
+----
+1599523200
+
+query I
+select to_unixtime(to_timestamp('2020-09-08'));
+----
+1599523200
+
+query I
+select to_unixtime(to_timestamp_seconds('2020-09-08'));
+----
+1599523200
+
+query I
+select to_unixtime(to_timestamp_millis('2020-09-08'));
+----
+1599523200
+
+query I
+select to_unixtime(to_timestamp_micros('2020-09-08'));
+----
+1599523200
+
+query I
+select to_unixtime(to_timestamp_nanos('2020-09-08'));
+----
+1599523200
+
+query I
+select to_unixtime(arrow_cast(1599523200, 'Int32'));
+----
+1599523200
+
+query I
+select to_unixtime(arrow_cast(1599523200, 'Int64'));
+----
+1599523200
+
+query I
+select to_unixtime(arrow_cast(1599523200.414, 'Float64'));
+----
+1599523200