alamb commented on code in PR #3570:
URL: https://github.com/apache/arrow-datafusion/pull/3570#discussion_r983488471


##########
datafusion/core/tests/sql/window.rs:
##########
@@ -578,6 +578,264 @@ async fn 
window_frame_rows_preceding_with_partition_unique_order_by() -> Result<
 //     Ok(())
 // }
 
+#[tokio::test]
+async fn window_frame_ranges_preceding_following_desc() -> Result<()> {
+    let ctx = SessionContext::new();
+    register_aggregate_csv(&ctx).await?;
+    let sql = "SELECT \
+               SUM(c4) OVER(ORDER BY c2 DESC RANGE BETWEEN 1 PRECEDING AND 1 
FOLLOWING),\
+               SUM(c3) OVER(ORDER BY c2 DESC RANGE BETWEEN 10000 PRECEDING AND 
10000 FOLLOWING),\
+               COUNT(*) OVER(ORDER BY c2 DESC RANGE BETWEEN 1 PRECEDING AND 1 
FOLLOWING) \
+               FROM aggregate_test_100 \
+               ORDER BY c9 \
+               LIMIT 5";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        
"+----------------------------+----------------------------+-----------------+",
+        "| SUM(aggregate_test_100.c4) | SUM(aggregate_test_100.c3) | 
COUNT(UInt8(1)) |",
+        
"+----------------------------+----------------------------+-----------------+",
+        "| 52276                      | 781                        | 56        
      |",
+        "| 260620                     | 781                        | 63        
      |",
+        "| -28623                     | 781                        | 37        
      |",
+        "| 260620                     | 781                        | 63        
      |",
+        "| 260620                     | 781                        | 63        
      |",
+        
"+----------------------------+----------------------------+-----------------+",
+    ];
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}
+
+#[tokio::test]
+async fn window_frame_order_by_asc_desc_large() -> Result<()> {
+    let ctx = SessionContext::new();
+    register_aggregate_csv(&ctx).await?;
+    let sql = "SELECT
+                SUM(c5) OVER (ORDER BY c2 ASC, c6 DESC)
+                FROM aggregate_test_100
+                LIMIT 5";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+----------------------------+",
+        "| SUM(aggregate_test_100.c5) |",
+        "+----------------------------+",
+        "| -1383162419                |",
+        "| -3265456275                |",
+        "| -3909681744                |",
+        "| -5241214934                |",
+        "| -4246910946                |",
+        "+----------------------------+",
+    ];
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}
+
+#[tokio::test]
+async fn window_frame_order_by_desc_large() -> Result<()> {
+    let ctx = SessionContext::new();
+    register_aggregate_csv(&ctx).await?;
+    let sql = "SELECT
+                SUM(c5) OVER (ORDER BY c2 DESC, c6 ASC)
+                FROM aggregate_test_100
+                ORDER BY c9
+                LIMIT 5";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+----------------------------+",
+        "| SUM(aggregate_test_100.c5) |",
+        "+----------------------------+",
+        "| 11212193439                |",
+        "| 22799733943                |",
+        "| 2935356871                 |",
+        "| 15810962683                |",
+        "| 18035025006                |",
+        "+----------------------------+",
+    ];
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}
+
+#[tokio::test]
+async fn window_frame_order_by_null_timestamp_order_by() -> Result<()> {
+    let ctx = SessionContext::new();
+    register_aggregate_null_cases_csv(&ctx).await?;
+    let sql = "SELECT
+                SUM(c1) OVER (ORDER BY c2 DESC)

Review Comment:
   I think `sum` will produce the same value regardless of the window order -- 
perhaps using `first()` might add better coverage to these tests



##########
datafusion/core/tests/sql/window.rs:
##########
@@ -765,3 +765,48 @@ async fn 
window_frame_ranges_unbounded_preceding_following_diff_col() -> Result<
     assert_batches_eq!(expected, &actual);
     Ok(())
 }
+
+#[tokio::test]
+async fn window_frame_ranges_unbounded_preceding_err() -> Result<()> {
+    let ctx = SessionContext::new();
+    register_aggregate_csv(&ctx).await?;
+    // execute the query
+    let df = ctx
+        .sql(
+            "SELECT \
+               SUM(c2) OVER (ORDER BY c2 RANGE BETWEEN UNBOUNDED PRECEDING AND 
UNBOUNDED PRECEDING), \
+               COUNT(*) OVER (ORDER BY c2 RANGE BETWEEN UNBOUNDED PRECEDING 
AND UNBOUNDED PRECEDING) \
+               FROM aggregate_test_100 \
+               ORDER BY c9 \
+               LIMIT 5",
+        )
+        .await;
+    assert_eq!(
+        df.err().unwrap().to_string(),
+        "Execution error: Invalid window frame: end bound cannot be unbounded 
preceding"

Review Comment:
   👍 



##########
datafusion/physical-expr/src/aggregate/sum.rs:
##########
@@ -312,7 +326,11 @@ impl Accumulator for SumAccumulator {
     fn evaluate(&self) -> Result<ScalarValue> {
         // TODO: add the checker for overflow
         // For the decimal(precision,_) data type, the absolute of value must 
be less than 10^precision.
-        Ok(self.sum.clone())
+        if self.count == 0 {

Review Comment:
   It took me a while to understand why this `count` field is necessary. I am a 
little worried that it might add overhead to the aggregate function (aka slow 
down aggregates). However, we can always implement a special case sum for 
window functions if this is a problem. 
   
   I think adding some comments saying that the count is needed to distinguish 
the case of "no values" if all values are  removed via `remove_batches` would 
help future readers. 
   
   I did try commenting it out and there was a test diff, so 👍  for the 
coverage. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to