alamb commented on code in PR #18317:
URL: https://github.com/apache/datafusion/pull/18317#discussion_r2475037700


##########
datafusion/functions-nested/src/range.rs:
##########
@@ -378,44 +420,45 @@ impl Range {
 
     fn gen_range_timestamp(&self, args: &[ArrayRef]) -> Result<ArrayRef> {
         let [start, stop, step] = take_function_args(self.name(), args)?;
+        let step = as_interval_mdn_array(step)?;
+
+        // Signature can only guarantee we get a timestamp type, not 
specifically
+        // timestamp(ns) so handle potential cast from other timestamps here.
+        fn cast_to_ns(arr: &ArrayRef) -> Result<ArrayRef> {

Review Comment:
   given `cast` already checks for the same type, I wonder if this is 
necessary? Or is the idea to cast to nanoseconds and maintain the timezone? 



##########
datafusion/functions-nested/src/range.rs:
##########
@@ -378,44 +420,45 @@ impl Range {
 
     fn gen_range_timestamp(&self, args: &[ArrayRef]) -> Result<ArrayRef> {
         let [start, stop, step] = take_function_args(self.name(), args)?;
+        let step = as_interval_mdn_array(step)?;
+
+        // Signature can only guarantee we get a timestamp type, not 
specifically
+        // timestamp(ns) so handle potential cast from other timestamps here.
+        fn cast_to_ns(arr: &ArrayRef) -> Result<ArrayRef> {
+            match arr.data_type() {
+                DataType::Timestamp(TimeUnit::Nanosecond, _) => 
Ok(Arc::clone(arr)),
+                DataType::Timestamp(_, tz) => Ok(cast(
+                    arr,
+                    &DataType::Timestamp(TimeUnit::Nanosecond, tz.to_owned()),
+                )?),
+                _ => unreachable!(),
+            }
+        }
+        let start = cast_to_ns(start)?;
+        let start = as_timestamp_nanosecond_array(&start)?;
+        let stop = cast_to_ns(stop)?;
+        let stop = as_timestamp_nanosecond_array(&stop)?;

Review Comment:
   I agree that would be nicer but I think it could also be done as a follow on 
PR



##########
datafusion/sqllogictest/test_files/array.slt:
##########
@@ -7131,9 +7142,18 @@ select generate_series(arrow_cast('2021-01-01T00:00:00', 
'Timestamp(Nanosecond,
 [2021-01-01T00:00:00-05:00, 2021-01-01T01:29:54.500-05:00, 
2021-01-01T02:59:49-05:00, 2021-01-01T04:29:43.500-05:00, 
2021-01-01T05:59:38-05:00]
 
 ## mixing types for timestamps is not supported
-query error DataFusion error: Internal error: Unexpected argument type for 
generate_series : Date32
+query error DataFusion error: Error during planning: Internal error: Function 
'generate_series' failed to match any signature
 select generate_series(arrow_cast('2021-01-01T00:00:00', 
'Timestamp(Nanosecond, Some("-05:00"))'), DATE '2021-01-02', INTERVAL '1' HOUR);
 
+## mixing types not allowed even if an argument is null
+query error DataFusion error: Error during planning: Internal error: Function 
'generate_series' failed to match any signature
+select generate_series(TIMESTAMP '1992-09-01', DATE '1993-03-01', NULL);
+
+query error DataFusion error: Error during planning: Internal error: Function 
'generate_series' failed to match any signature
+select generate_series(1, '2024-01-01', '2025-01-02');
+
+query error DataFusion error: Error during planning: Internal error: Function 
'generate_series' failed to match any signature
+select generate_series('2024-01-01'::timestamp, '2025-01-02', interval '1 
day');

Review Comment:
   Yes, I agree we should fix it in the signature related code
   
   I think it would also be ideal if we can provide more details on what went 
wrong and how we can fix it (aka as a user who doesn't understand the internal 
implementation, if I get this error message, how do I fix it?)



##########
datafusion/sqllogictest/test_files/array.slt:
##########
@@ -6938,13 +6938,13 @@ select range(5),
        range(2, 5),
        range(2, 10, 3),
        range(10, 2, -3),
-       range(1, 5, -1),
-       range(1, -5, 1),
-       range(1, -5, -1),
-       range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH),
-       range(DATE '1993-02-01', DATE '1993-01-01', INTERVAL '-1' DAY),
-       range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '1' YEAR),
-       range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR)
+       range(arrow_cast(1, 'Int8'), 5, -1),
+       range(arrow_cast(1, 'Int16'), arrow_cast(-5, 'Int8'), 1),
+       range(arrow_cast(1, 'Int32'), arrow_cast(-5, 'Int16'), arrow_cast(-1, 
'Int8')),
+       range(DATE '1992-09-01', DATE '1993-03-01', arrow_cast('1 MONTH', 
'Interval(YearMonth)')),
+       range(DATE '1993-02-01', arrow_cast(DATE '1993-01-01', 'Date64'), 
INTERVAL '-1' DAY),
+       range(arrow_cast(DATE '1989-04-01', 'Date64'), DATE '1993-03-01', 
INTERVAL '1' YEAR),
+       range(arrow_cast(DATE '1993-03-01', 'Date64'), arrow_cast(DATE 
'1989-04-01', 'Date64'), INTERVAL '1' YEAR)

Review Comment:
   I recommend making this more explicit by:
   1. Revert the  change to the existing tests
   2. Add a new query right below this one that adds the casts to the other 
integer types



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to