Jefffrey commented on code in PR #17424: URL: https://github.com/apache/datafusion/pull/17424#discussion_r2361626934
########## datafusion/functions/src/datetime/make_interval.rs: ########## @@ -0,0 +1,605 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use crate::utils::make_scalar_function; +use arrow::array::{Array, ArrayRef, IntervalMonthDayNanoBuilder, PrimitiveArray}; +use arrow::datatypes::DataType::Interval; +use arrow::datatypes::IntervalUnit::MonthDayNano; +use arrow::datatypes::{DataType, IntervalMonthDayNano}; +use datafusion_common::{ + exec_err, plan_datafusion_err, DataFusionError, Result, ScalarValue, +}; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, + Volatility, +}; +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Construct an INTERVAL (MonthDayNano) from component parts. Missing arguments default to 0; if any provided argument is NULL on a row, the result is NULL.", + syntax_example = "make_interval([years[, months[, weeks[, days[, hours[, mins[, secs]]]]]])", + sql_example = r#"```sql +-- Inline example without creating a table +> SELECT + y, m, w, d, h, mi, s, + make_interval(y, m, w, d, h, mi, s) AS interval + FROM VALUES + (1, 1, 1, 1, 1, 1, 1.0) + AS v(y, m, w, d, h, mi, s); ++---+---+---+---+---+---+---+---------------------------------------------------+ +|y |m |w |d |h |mi |s |interval | ++---+---+---+---+---+---+---+---------------------------------------------------+ +|1 |1 |1 |1 |1 |1 |1.0|1 years 1 months 8 days 1 hours 1 minutes 1 seconds| ++---+---+---+---+---+---+---+---------------------------------------------------+ +```"#, + argument( + name = "years", + description = "Years to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "months", + description = "Months to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "weeks", + description = "Weeks to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "days", + description = "Days to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "hours", + description = "Hours to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "mins", + description = "Minutes to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "secs", + description = "Seconds to use when making the interval (may be fractional). Optional; defaults to 0. Must be finite (not NaN/±Inf). Can be a constant, column or function, and any combination of arithmetic operators." + ) +)] +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct MakeIntervalFunc { + signature: Signature, +} + +impl Default for MakeIntervalFunc { + fn default() -> Self { + Self::new() + } +} + +impl MakeIntervalFunc { + pub fn new() -> Self { + Self { + signature: Signature::user_defined(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for MakeIntervalFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "make_interval" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> { + Ok(Interval(MonthDayNano)) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> { + if args.args.is_empty() { + return Ok(ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano( + Some(IntervalMonthDayNano::new(0, 0, 0)), + ))); + } + make_scalar_function(make_interval_kernel, vec![])(&args.args) + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> { + let length = arg_types.len(); + match length { + x if x > 7 => { + exec_err!( + "make_interval expects between 0 and 7 arguments, got {}", + arg_types.len() + ) + } + _ => Ok((0..arg_types.len()) + .map(|i| { + if i == 6 { + DataType::Float64 + } else { + DataType::Int32 + } + }) + .collect()), + } + } + + fn documentation(&self) -> Option<&Documentation> { + self.doc() + } +} + +fn make_interval_kernel(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> { + use arrow::array::AsArray; + use arrow::datatypes::{Float64Type, Int32Type}; + + let n_rows = args[0].len(); + + let years = args[0] + .as_primitive_opt::<Int32Type>() + .ok_or_else(|| plan_datafusion_err!("make_interval arg[0] must be Int32"))?; + let months = args + .get(1) + .map(|a| { + a.as_primitive_opt::<Int32Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[1] must be Int32") + }) + }) + .transpose()?; + let weeks = args + .get(2) + .map(|a| { + a.as_primitive_opt::<Int32Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[2] must be Int32") + }) + }) + .transpose()?; + let days: Option<&PrimitiveArray<Int32Type>> = args + .get(3) + .map(|a| { + a.as_primitive_opt::<Int32Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[3] must be Int32") + }) + }) + .transpose()?; + let hours: Option<&PrimitiveArray<Int32Type>> = args + .get(4) + .map(|a| { + a.as_primitive_opt::<Int32Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[4] must be Int32") + }) + }) + .transpose()?; + let mins: Option<&PrimitiveArray<Int32Type>> = args + .get(5) + .map(|a| { + a.as_primitive_opt::<Int32Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[5] must be Int32") + }) + }) + .transpose()?; + let secs: Option<&PrimitiveArray<Float64Type>> = args + .get(6) + .map(|a| { + a.as_primitive_opt::<Float64Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[6] must be Float64") + }) + }) + .transpose()?; + + let mut builder = IntervalMonthDayNanoBuilder::with_capacity(n_rows); + + for i in 0..n_rows { + // if one column is NULL → result NULL + let any_null_present = years.is_null(i) + || months.as_ref().is_some_and(|a| a.is_null(i)) + || weeks.as_ref().is_some_and(|a| a.is_null(i)) + || days.as_ref().is_some_and(|a| a.is_null(i)) + || hours.as_ref().is_some_and(|a| a.is_null(i)) + || mins.as_ref().is_some_and(|a| a.is_null(i)) + || secs.as_ref().is_some_and(|a| { + a.is_null(i) || !a.value(i).is_finite() || a.value(i).is_nan() + }); + + if any_null_present { + builder.append_null(); + continue; + } + + // default values 0 or 0.0 + let y = years.value(i); + let mo = months.as_ref().map_or(0, |a| a.value(i)); + let w = weeks.as_ref().map_or(0, |a| a.value(i)); + let d = days.as_ref().map_or(0, |a| a.value(i)); + let h = hours.as_ref().map_or(0, |a| a.value(i)); + let mi = mins.as_ref().map_or(0, |a| a.value(i)); + let s = secs.as_ref().map_or(0.0, |a| a.value(i)); + + match make_interval_month_day_nano(y, mo, w, d, h, mi, s)? { + Some(v) => builder.append_value(v), + None => { + builder.append_null(); + continue; + } + } + } + + Ok(Arc::new(builder.finish())) +} + +pub fn make_interval_month_day_nano( + year: i32, + month: i32, + week: i32, + day: i32, + hour: i32, + min: i32, + sec: f64, +) -> Result<Option<IntervalMonthDayNano>> { Review Comment: ```suggestion ) -> Option<IntervalMonthDayNano> { ``` Now that the overflow -> error flow is gone, can simplify this. ########## datafusion/functions/src/datetime/make_interval.rs: ########## @@ -0,0 +1,605 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use crate::utils::make_scalar_function; +use arrow::array::{Array, ArrayRef, IntervalMonthDayNanoBuilder, PrimitiveArray}; +use arrow::datatypes::DataType::Interval; +use arrow::datatypes::IntervalUnit::MonthDayNano; +use arrow::datatypes::{DataType, IntervalMonthDayNano}; +use datafusion_common::{ + exec_err, plan_datafusion_err, DataFusionError, Result, ScalarValue, +}; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, + Volatility, +}; +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Construct an INTERVAL (MonthDayNano) from component parts. Missing arguments default to 0; if any provided argument is NULL on a row, the result is NULL.", + syntax_example = "make_interval([years[, months[, weeks[, days[, hours[, mins[, secs]]]]]])", + sql_example = r#"```sql +-- Inline example without creating a table +> SELECT + y, m, w, d, h, mi, s, + make_interval(y, m, w, d, h, mi, s) AS interval + FROM VALUES + (1, 1, 1, 1, 1, 1, 1.0) + AS v(y, m, w, d, h, mi, s); ++---+---+---+---+---+---+---+---------------------------------------------------+ +|y |m |w |d |h |mi |s |interval | ++---+---+---+---+---+---+---+---------------------------------------------------+ +|1 |1 |1 |1 |1 |1 |1.0|1 years 1 months 8 days 1 hours 1 minutes 1 seconds| ++---+---+---+---+---+---+---+---------------------------------------------------+ +```"#, + argument( + name = "years", + description = "Years to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "months", + description = "Months to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "weeks", + description = "Weeks to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "days", + description = "Days to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "hours", + description = "Hours to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "mins", + description = "Minutes to use when making the interval. Optional; defaults to 0. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "secs", + description = "Seconds to use when making the interval (may be fractional). Optional; defaults to 0. Must be finite (not NaN/±Inf). Can be a constant, column or function, and any combination of arithmetic operators." + ) +)] +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct MakeIntervalFunc { + signature: Signature, +} + +impl Default for MakeIntervalFunc { + fn default() -> Self { + Self::new() + } +} + +impl MakeIntervalFunc { + pub fn new() -> Self { + Self { + signature: Signature::user_defined(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for MakeIntervalFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "make_interval" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> { + Ok(Interval(MonthDayNano)) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> { + if args.args.is_empty() { + return Ok(ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano( + Some(IntervalMonthDayNano::new(0, 0, 0)), + ))); + } + make_scalar_function(make_interval_kernel, vec![])(&args.args) + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> { + let length = arg_types.len(); + match length { + x if x > 7 => { + exec_err!( + "make_interval expects between 0 and 7 arguments, got {}", + arg_types.len() + ) + } + _ => Ok((0..arg_types.len()) + .map(|i| { + if i == 6 { + DataType::Float64 + } else { + DataType::Int32 + } + }) + .collect()), + } + } + + fn documentation(&self) -> Option<&Documentation> { + self.doc() + } +} + +fn make_interval_kernel(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> { + use arrow::array::AsArray; + use arrow::datatypes::{Float64Type, Int32Type}; + + let n_rows = args[0].len(); + + let years = args[0] + .as_primitive_opt::<Int32Type>() + .ok_or_else(|| plan_datafusion_err!("make_interval arg[0] must be Int32"))?; + let months = args + .get(1) + .map(|a| { + a.as_primitive_opt::<Int32Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[1] must be Int32") + }) + }) + .transpose()?; + let weeks = args + .get(2) + .map(|a| { + a.as_primitive_opt::<Int32Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[2] must be Int32") + }) + }) + .transpose()?; + let days: Option<&PrimitiveArray<Int32Type>> = args + .get(3) + .map(|a| { + a.as_primitive_opt::<Int32Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[3] must be Int32") + }) + }) + .transpose()?; + let hours: Option<&PrimitiveArray<Int32Type>> = args + .get(4) + .map(|a| { + a.as_primitive_opt::<Int32Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[4] must be Int32") + }) + }) + .transpose()?; + let mins: Option<&PrimitiveArray<Int32Type>> = args + .get(5) + .map(|a| { + a.as_primitive_opt::<Int32Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[5] must be Int32") + }) + }) + .transpose()?; + let secs: Option<&PrimitiveArray<Float64Type>> = args + .get(6) + .map(|a| { + a.as_primitive_opt::<Float64Type>().ok_or_else(|| { + plan_datafusion_err!("make_dt_interval arg[6] must be Float64") + }) + }) + .transpose()?; + + let mut builder = IntervalMonthDayNanoBuilder::with_capacity(n_rows); + + for i in 0..n_rows { + // if one column is NULL → result NULL + let any_null_present = years.is_null(i) + || months.as_ref().is_some_and(|a| a.is_null(i)) + || weeks.as_ref().is_some_and(|a| a.is_null(i)) + || days.as_ref().is_some_and(|a| a.is_null(i)) + || hours.as_ref().is_some_and(|a| a.is_null(i)) + || mins.as_ref().is_some_and(|a| a.is_null(i)) + || secs.as_ref().is_some_and(|a| { + a.is_null(i) || !a.value(i).is_finite() || a.value(i).is_nan() Review Comment: ```suggestion a.is_null(i) || !a.value(i).is_finite() ``` fyi finite check encompasses nan check too -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org