alamb commented on code in PR #9040:
URL: https://github.com/apache/arrow-datafusion/pull/9040#discussion_r1470271635


##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -497,6 +502,99 @@ pub fn make_current_time(
     move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
 }
 
+/// make_date(year, month, date) SQL function implementation
+pub fn make_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.len() != 3 {
+        return exec_err!(
+            "make_date function requires 3 arguments, got {}",
+            args.len()
+        );
+    }
+
+    // first, identify if any of the arguments is an Array. If yes, store its 
`len`,

Review Comment:
   I think you can use `columnar_values_to_array` for this, which was recently 
added by @viirya  for just this purpose
   
   
https://github.com/apache/arrow-datafusion/blob/851bc7de850d0261202b18641561c8bd09abe588/datafusion-examples/examples/simple_udf.rs#L74



##########
datafusion/sqllogictest/test_files/timestamps.slt:
##########
@@ -2233,4 +2233,168 @@ SELECT val, ts1 - ts2 AS ts_diff FROM table_a ORDER BY 
ts2 - ts1
 1 0 days -1 hours 0 mins 0.000000000 secs
 2 0 days -1 hours 0 mins 0.000000000 secs
 
+##########
+## make date tests
+##########
+
+query D
+select make_date(2024, 1, 27);
+----
+2024-01-27
+
+query D
+select make_date(42, 1, 27);
+----
+0042-01-27
+
+query D
+select make_date(99, 1, 27);
+----
+0099-01-27
+
+query D
+select make_date(2024, 2, 29);
+----
+2024-02-29
+
+query D
+select make_date(10001, 1, 27);
+----
++10001-01-27
+
+query D
+select make_date('2024', '01', '27');
+----
+2024-01-27
+
+query D
+select make_date(12 + 2012, '01', '27');
+----
+2024-01-27
+
+query D
+select make_date(2024::bigint, 01::bigint, 27::bigint);
+----
+2024-01-27
+
+query D
+select make_date(arrow_cast(2024, 'Int64'), arrow_cast(1, 'Int64'), 
arrow_cast(27, 'Int64'));
+----
+2024-01-27
+
+query D
+select make_date(arrow_cast(2024, 'Int32'), arrow_cast(1, 'Int32'), 
arrow_cast(27, 'Int32'));
+----
+2024-01-27
+
+statement ok
+create table table_nums (year int, month int, day int) as values
+    (2024, 1, 23),
+    (2023, 11, 30);
+
+query D
+select make_date(t.year, t.month, t.day) from table_nums t;
+----
+2024-01-23
+2023-11-30
+
+query D
+select make_date(2021, t.month, t.day) from table_nums t;
+----
+2021-01-23
+2021-11-30
+
+query D
+select make_date(t.year, 3, t.day) from table_nums t;
+----
+2024-03-23
+2023-03-30
+
+query D
+select make_date(t.year, t.month, 4) from table_nums t;
+----
+2024-01-04
+2023-11-04
+
+query D
+select make_date('2021', t.month, t.day) from table_nums t;
+----
+2021-01-23
+2021-11-30
+
+query D
+select make_date(t.year, '3', t.day) from table_nums t;
+----
+2024-03-23
+2023-03-30
+
+query D
+select make_date(t.year, t.month, '4') from table_nums t;
+----
+2024-01-04
+2023-11-04
+
+statement ok
+insert into table_nums values (2024, null, 23);
+
+query error DataFusion error: Execution error: Unable to parse date from 2024, 
0, 23
+select make_date(t.year, t.month, t.day) from table_nums t;
+
+statement ok
+drop table table_nums;
+
+statement ok
+create table table_strings (year varchar(4), month varchar(2), day varchar(2)) 
as values
+    ('2024', '1', '23'),
+    ('2023', '11', '30');
+
+query D
+select make_date(t.year, t.month, t.day) from table_strings t;
+----
+2024-01-23
+2023-11-30
+
+statement ok
+insert into table_strings values (2024, null, 23);
+
+query error DataFusion error: Execution error: Unable to parse date from 2024, 
0, 23
+select make_date(t.year, t.month, t.day) from table_strings t;
+
+statement ok
+drop table table_strings;
+
+query error DataFusion error: Execution error: Unable to parse date from 2024, 
13, 23

Review Comment:
   😍  -- 💯  for test coverage



##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -497,6 +502,99 @@ pub fn make_current_time(
     move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
 }
 
+/// make_date(year, month, date) SQL function implementation
+pub fn make_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.len() != 3 {
+        return exec_err!(
+            "make_date function requires 3 arguments, got {}",
+            args.len()
+        );
+    }
+
+    // first, identify if any of the arguments is an Array. If yes, store its 
`len`,
+    // as any scalar will need to be converted to an array of len `len`.
+    let len = args
+        .iter()
+        .fold(Option::<usize>::None, |acc, arg| match arg {
+            ColumnarValue::Scalar(_) => acc,
+            ColumnarValue::Array(a) => Some(a.len()),
+        });
+
+    let is_scalar = len.is_none();
+    let array_size = if is_scalar { 1 } else { len.unwrap() };
+
+    let years = cast_column(&args[0], &DataType::Int32, None)?;
+    let months = cast_column(&args[1], &DataType::Int32, None)?;
+    let days = cast_column(&args[2], &DataType::Int32, None)?;
+
+    let value_fn = |col: &ColumnarValue, pos: usize| -> Result<i32> {
+        match col {
+            ColumnarValue::Array(a) => 
Ok(a.as_primitive::<Int32Type>().value(pos)),
+            ColumnarValue::Scalar(s) => match s {
+                ScalarValue::Int32(i) => match i {
+                    Some(i) => Ok(*i),
+                    None => {
+                        exec_err!("Unable to parse date from null/empty value")
+                    }
+                },
+                _ => unreachable!(),
+            },
+        }
+    };
+
+    // since the epoch for the date32 datatype is the unix epoch
+    // we need to subtract the unix epoch from the current date
+    // note this can result in a negative value
+    let unix_days_from_ce = NaiveDate::from_ymd_opt(1970, 1, 1)
+        .unwrap()
+        .num_days_from_ce();
+
+    let mut builder: PrimitiveBuilder<Date32Type> = 
PrimitiveArray::builder(array_size);
+
+    for i in 0..array_size {
+        let y = value_fn(&years, i)?;
+        let m = u32::try_from(value_fn(&months, i)?);
+        let d = u32::try_from(value_fn(&days, i)?);
+
+        if m.is_err() {
+            return exec_err!(
+                "Month value '{:?}' is out of range",
+                value_fn(&months, i).unwrap()
+            );
+        }

Review Comment:
   I think you can write this logic like so to avoid the unwraps below:
   
   ```suggestion
           let Ok(m) = m else {
               return exec_err!(
                   "Month value '{:?}' is out of range",
                   value_fn(&months, i).unwrap()
               );
           };
   ```



##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -497,6 +502,99 @@ pub fn make_current_time(
     move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
 }
 
+/// make_date(year, month, date) SQL function implementation
+pub fn make_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.len() != 3 {
+        return exec_err!(
+            "make_date function requires 3 arguments, got {}",
+            args.len()
+        );
+    }
+
+    // first, identify if any of the arguments is an Array. If yes, store its 
`len`,
+    // as any scalar will need to be converted to an array of len `len`.
+    let len = args
+        .iter()
+        .fold(Option::<usize>::None, |acc, arg| match arg {
+            ColumnarValue::Scalar(_) => acc,
+            ColumnarValue::Array(a) => Some(a.len()),
+        });
+
+    let is_scalar = len.is_none();
+    let array_size = if is_scalar { 1 } else { len.unwrap() };
+
+    let years = cast_column(&args[0], &DataType::Int32, None)?;
+    let months = cast_column(&args[1], &DataType::Int32, None)?;
+    let days = cast_column(&args[2], &DataType::Int32, None)?;
+
+    let value_fn = |col: &ColumnarValue, pos: usize| -> Result<i32> {

Review Comment:
   This could probably be implemented more efficiently with some sort of 
iterator that wrapped a `ColumnarValue` and returned i32s  -- that way you 
could iterate over all three arrays with `zip` and not have to do the downcast 
eah time, etc
   
   -- and then you could generate specialized implementations for 
ColumnarValue::Array and ColumnarValue::Scalar
   
   However, given `make_date` is likely to be run with all constants I don't 
think there is any pressing need to optimize the implementation unless someone 
has a particular need for a faster implementation



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to