alamb commented on code in PR #8266:
URL: https://github.com/apache/arrow-datafusion/pull/8266#discussion_r1402140424
##########
datafusion/optimizer/src/single_distinct_to_groupby.rs:
##########
@@ -64,22 +67,32 @@ fn is_single_distinct_agg(plan: &LogicalPlan) ->
Result<bool> {
match plan {
LogicalPlan::Aggregate(Aggregate { aggr_expr, .. }) => {
let mut fields_set = HashSet::new();
- let mut distinct_count = 0;
+ let mut aggregate_count = 0;
for expr in aggr_expr {
if let Expr::AggregateFunction(AggregateFunction {
- distinct, args, ..
+ fun,
+ distinct,
+ args,
+ filter,
+ ..
}) = expr
{
- if *distinct {
- distinct_count += 1;
- }
- for e in args {
- fields_set.insert(e.canonical_name());
+ match filter {
+ Some(_) => return Ok(false),
+ None => {
+ aggregate_count += 1;
+ if *distinct {
+ for e in args {
+ fields_set.insert(e.canonical_name());
+ }
+ } else if !matches!(fun, Sum | Min | Max) {
Review Comment:
We could also do crazier stuff for `AVG` like
```sql
SELECT a, COUNT(DINSTINCT b), AVG(c)
FROM t
GROUP BY a
```
```sql
SELECT a, COUNT(alias1), SUM(alias2) / SUM(alias3) -- <-- This is combining
partial sum / counts to compute AVG
FROM (
SELECT a, b as alias1, SUM(c) as alias2, COUNT(c) as alias3,
FROM t
GROUP BY a, b
)
GROUP BY a
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]