avantgardnerio commented on code in PR #2797:
URL: https://github.com/apache/arrow-datafusion/pull/2797#discussion_r907718292


##########
datafusion/physical-expr/src/expressions/datetime.rs:
##########
@@ -86,76 +89,114 @@ impl PhysicalExpr for DateIntervalExpr {
         let dates = self.lhs.evaluate(batch)?;
         let intervals = self.rhs.evaluate(batch)?;
 
-        let interval = match intervals {
-            ColumnarValue::Scalar(interval) => match interval {
-                ScalarValue::IntervalDayTime(Some(interval)) => interval as 
i32,
-                ScalarValue::IntervalYearMonth(Some(_)) => {
-                    return Err(DataFusionError::Execution(
-                        "DateIntervalExpr does not support 
IntervalYearMonth".to_string(),
-                    ))
-                }
-                ScalarValue::IntervalMonthDayNano(Some(_)) => {
-                    return Err(DataFusionError::Execution(
-                        "DateIntervalExpr does not support 
IntervalMonthDayNano"
-                            .to_string(),
-                    ))
-                }
-                other => {
-                    return Err(DataFusionError::Execution(format!(
-                        "DateIntervalExpr does not support non-interval type 
{:?}",
-                        other
-                    )))
-                }
-            },
-            _ => {
-                return Err(DataFusionError::Execution(
-                    "Columnar execution is not yet supported for 
DateIntervalExpr"
-                        .to_string(),
-                ))
+        // Unwrap days since epoch
+        let operand = match dates {
+            ColumnarValue::Scalar(scalar) => scalar,
+            _ => Err(DataFusionError::Execution(
+                "Columnar execution is not yet supported for DateIntervalExpr"
+                    .to_string(),
+            ))?,
+        };
+
+        // Convert to NaiveDate
+        let epoch = NaiveDate::from_ymd(1970, 1, 1);
+        let prior = match operand {
+            ScalarValue::Date32(Some(date)) => {
+                epoch.add(chrono::Duration::days(date as i64))
             }
+            ScalarValue::Date64(Some(date)) => 
epoch.add(chrono::Duration::days(date)),
+            _ => Err(DataFusionError::Execution(format!(
+                "Invalid lhs type for DateIntervalExpr: {:?}",
+                operand
+            )))?,
         };
 
-        match dates {
-            ColumnarValue::Scalar(scalar) => match scalar {
-                ScalarValue::Date32(Some(date)) => match &self.op {
-                    Operator::Plus => 
Ok(ColumnarValue::Scalar(ScalarValue::Date32(
-                        Some(date + interval),
-                    ))),
-                    Operator::Minus => 
Ok(ColumnarValue::Scalar(ScalarValue::Date32(
-                        Some(date - interval),
-                    ))),
-                    _ => {
-                        // this should be unreachable because we check the 
operators in `try_new`
-                        Err(DataFusionError::Execution(
-                            "Invalid operator for 
DateIntervalExpr".to_string(),
-                        ))
-                    }
-                },
-                ScalarValue::Date64(Some(date)) => match &self.op {
-                    Operator::Plus => 
Ok(ColumnarValue::Scalar(ScalarValue::Date64(
-                        Some(date + interval as i64),
-                    ))),
-                    Operator::Minus => 
Ok(ColumnarValue::Scalar(ScalarValue::Date64(
-                        Some(date - interval as i64),
-                    ))),
-                    _ => {
-                        // this should be unreachable because we check the 
operators in `try_new`
-                        Err(DataFusionError::Execution(
-                            "Invalid operator for 
DateIntervalExpr".to_string(),
-                        ))
-                    }
-                },
-                _ => {
-                    // this should be unreachable because we check the types 
in `try_new`
-                    Err(DataFusionError::Execution(
-                        "Invalid lhs type for DateIntervalExpr".to_string(),
-                    ))
-                }
-            },
+        // Unwrap interval to add
+        let scalar = match &intervals {
+            ColumnarValue::Scalar(interval) => interval,
             _ => Err(DataFusionError::Execution(
                 "Columnar execution is not yet supported for DateIntervalExpr"
                     .to_string(),
-            )),
-        }
+            ))?,
+        };
+
+        // Negate for subtraction
+        let interval = match &scalar {
+            ScalarValue::IntervalDayTime(Some(interval)) => *interval,
+            ScalarValue::IntervalYearMonth(Some(interval)) => *interval as i64,
+            ScalarValue::IntervalMonthDayNano(Some(_interval)) => {
+                Err(DataFusionError::Execution(
+                    "DateIntervalExpr does not support 
IntervalMonthDayNano".to_string(),
+                ))?
+            }
+            other => Err(DataFusionError::Execution(format!(
+                "DateIntervalExpr does not support non-interval type {:?}",
+                other
+            )))?,
+        };
+        let interval = match &self.op {
+            Operator::Plus => interval,
+            Operator::Minus => -interval,
+            _ => {
+                // this should be unreachable because we check the operators 
in `try_new`
+                Err(DataFusionError::Execution(
+                    "Invalid operator for DateIntervalExpr".to_string(),
+                ))?
+            }
+        };
+
+        // Add interval
+        let posterior = match scalar {
+            ScalarValue::IntervalDayTime(Some(_)) => {
+                prior.add(chrono::Duration::days(interval))
+            }
+            ScalarValue::IntervalYearMonth(Some(_)) => {
+                let target = add_months(prior, interval);
+                let target_plus = add_months(target, 1);
+                let last_day = target_plus.sub(chrono::Duration::days(1));
+                let day = min(prior.day(), last_day.day());

Review Comment:
   When the type is `IntervalDayTime`, I think I copied the behavior of the 
existing code, which was to grab a 32-bit number, which appears to represent 
the number of days based on this snippet 
https://github.com/apache/arrow-datafusion/blob/7617d78809d4ff5bde31142e0744c70024e40635/datafusion/physical-expr/src/expressions/datetime.rs#L122
 (I assume there were tests around this? and if so they still pass).
   
   For `IntervalYearMonth` I guessed and was correct based on the documentation 
you linked (ty!):
   ```
   YEAR_MONTH - Indicates the number of elapsed whole months
   ```
   
   I wasn't clear on what `IntervalMonthDayNano` meant, but now I do thanks to 
your link! This PR leaves it unimplemented (no change in behavior). It isn't 
critical to passing the TPC-H benchmarks, so it wasn't a priority for me at 
this moment.
   
   As for the actual business logic of `IntervalYearMonth`, it is always stored 
as a number of months (year=12), and it turns out that adding (and subtracting) 
months is not very straight forward, and not implemented in chrono 
https://stackoverflow.com/questions/64081289/how-do-i-add-a-month-to-a-chrono-naivedate
 . When I looked into other implementations, they appeared to have issues (i.e. 
not supporting negative arguments), so I wrote this.
   
   Unfortunately, one can't just add a fixed time unit (months have different 
numbers of days), and one has to know what is being added to in order to add 
properly (some Februaries have 29 days, others only 28). Observing postgres 
behavior, days of the month get clamped `2022-01-31` + 1 month = `2022-02-28`, 
so this logic is attempting to reproduce that behavior.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to