kumarUjjawal commented on code in PR #21926:
URL: https://github.com/apache/datafusion/pull/21926#discussion_r3231849496
##########
datafusion/sqllogictest/test_files/aggregate.slt:
##########
@@ -3725,6 +3725,144 @@ select var_pop(c1), arrow_typeof(var_pop(c1)) from t;
statement ok
drop table t;
+# aggregate variance decimal128
+statement ok
+create table t_decimal_stats (g int, a decimal(10, 2)) as values
+ (1, 1), (1, 2), (1, 3), (2, 4), (2, null), (2, 5);
+
+query RRRRT
+select var(a), var_pop(a), stddev(a), stddev_pop(a), arrow_typeof(var(a)) from
t_decimal_stats;
+----
+2.5 2 1.581138830084 1.414213562373 Float64
+
+query IRRRR
+select g, var(a), var_pop(a), stddev(a), stddev_pop(a) from t_decimal_stats
group by g order by g;
+----
+1 1 0.666666666667 1 0.816496580928
+2 0.5 0.25 0.707106781187 0.5
+
+query R
+select var(a) over() from t_decimal_stats order by a nulls last limit 1;
+----
+2.5
+
+query R
+select var(a) over (order by g, a nulls last rows between 1 preceding and 1
following)
+from t_decimal_stats
+order by g, a nulls last;
+----
+0.5
+1
+1
+1
+0.5
+NULL
+
+query R
+select var(distinct a) from t_decimal_stats;
+----
+2.5
+
+statement ok
+drop table t_decimal_stats;
+
+# aggregate variance decimal128 single input
+query RRRR
+select var(a), var_pop(a), stddev(a), stddev_pop(a)
+from (values (cast(1 as decimal(10, 2)))) as t(a);
+----
+NULL 0 NULL 0
+
+# aggregate variance decimal128 all nulls
+query RRRR
+select var(a), var_pop(a), stddev(a), stddev_pop(a)
+from (values (cast(null as decimal(10, 2))), (cast(null as decimal(10, 2))))
as t(a);
+----
+NULL NULL NULL NULL
+
+# aggregate variance decimal128 large values
+query RR
+select var(a), var_pop(a) from (
+ values
+ ('100000000000000000000'::decimal(38, 0)),
+ ('100000000000000000001'::decimal(38, 0))
+) as t(a);
+----
+0.5 0.25
+
+# aggregate variance decimal128 exact overflow fallback
+query RRT
+select var_pop(a), stddev_pop(a), arrow_typeof(var_pop(a)) from (
+ values
+ ('99999999999999999999999999999999999999'::decimal(38, 0)),
+ ('99999999999999999999999999999999999999'::decimal(38, 0)),
+ ('99999999999999999999999999999999999999'::decimal(38, 0)),
+ ('99999999999999999999999999999999999999'::decimal(38, 0)),
+ ('99999999999999999999999999999999999999'::decimal(38, 0)),
+ ('99999999999999999999999999999999999999'::decimal(38, 0))
+) as t(a);
+----
+0 0 Float64
+
+# aggregate variance decimal128 fallback preserves local spread
+query R
+select var_pop(a) from (
+ values
+ ('99999999999999999999999999999999999999'::decimal(38, 0)),
+ ('99999999999999999999999999999999999998'::decimal(38, 0)),
+ ('99999999999999999999999999999999999997'::decimal(38, 0)),
+ ('99999999999999999999999999999999999996'::decimal(38, 0)),
+ ('99999999999999999999999999999999999995'::decimal(38, 0)),
+ ('99999999999999999999999999999999999994'::decimal(38, 0))
+) as t(a);
+----
+2.916666666667
Review Comment:
```
> SELECT var_pop(a) FROM (VALUES
('99999999999999999999999999999999999999'::decimal(38,0)),
('99999999999999999999999999999999999998'::decimal(38,0)),
('99999999999999999999999999999999999997'::decimal(38,0)),
('99999999999999999999999999999999999996'::decimal(38,0)),
('99999999999999999999999999999999999995'::decimal(38,0)),
('99999999999999999999999999999999999994'::decimal(38,0))
) t(a);
+--------------------+
| var_pop(t.a) |
+--------------------+
| 2.9166666666666665 |
+--------------------+
1 row(s) fetched.
Elapsed 0.003 seconds.
```
You are right just handling this one case is not worth this much complexity,
I was just trying to check if there was a better way differentiate DF from
others.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]