This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 411185ff5e Implement tree explain for AggregateExec (#15103)
411185ff5e is described below
commit 411185ff5e9aa319a4233e40184c8a55607d8f54
Author: zeb <[email protected]>
AuthorDate: Wed Mar 12 03:39:51 2025 +0800
Implement tree explain for AggregateExec (#15103)
* Implement tree explain for AggregateExec
* Extract expr formatting logic for readability
* fix empty group_by display
---
datafusion/physical-plan/src/aggregates/mod.rs | 77 +++++++++++++++-------
.../sqllogictest/test_files/explain_tree.slt | 77 ++++++++++++++--------
2 files changed, 103 insertions(+), 51 deletions(-)
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs
b/datafusion/physical-plan/src/aggregates/mod.rs
index 7d4837d047..5dccc09fc7 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -742,6 +742,15 @@ impl DisplayAs for AggregateExec {
t: DisplayFormatType,
f: &mut std::fmt::Formatter,
) -> std::fmt::Result {
+ let format_expr_with_alias =
+ |(e, alias): &(Arc<dyn PhysicalExpr>, String)| -> String {
+ let e = e.to_string();
+ if &e != alias {
+ format!("{e} as {alias}")
+ } else {
+ e
+ }
+ };
match t {
DisplayFormatType::Default | DisplayFormatType::Verbose => {
write!(f, "AggregateExec: mode={:?}", self.mode)?;
@@ -749,14 +758,7 @@ impl DisplayAs for AggregateExec {
self.group_by
.expr
.iter()
- .map(|(e, alias)| {
- let e = e.to_string();
- if &e != alias {
- format!("{e} as {alias}")
- } else {
- e
- }
- })
+ .map(format_expr_with_alias)
.collect()
} else {
self.group_by
@@ -768,21 +770,11 @@ impl DisplayAs for AggregateExec {
.enumerate()
.map(|(idx, is_null)| {
if *is_null {
- let (e, alias) =
&self.group_by.null_expr[idx];
- let e = e.to_string();
- if &e != alias {
- format!("{e} as {alias}")
- } else {
- e
- }
+ format_expr_with_alias(
+ &self.group_by.null_expr[idx],
+ )
} else {
- let (e, alias) =
&self.group_by.expr[idx];
- let e = e.to_string();
- if &e != alias {
- format!("{e} as {alias}")
- } else {
- e
- }
+
format_expr_with_alias(&self.group_by.expr[idx])
}
})
.collect::<Vec<String>>()
@@ -809,8 +801,45 @@ impl DisplayAs for AggregateExec {
}
}
DisplayFormatType::TreeRender => {
- // TODO: collect info
- write!(f, "")?;
+ let g: Vec<String> = if self.group_by.is_single() {
+ self.group_by
+ .expr
+ .iter()
+ .map(format_expr_with_alias)
+ .collect()
+ } else {
+ self.group_by
+ .groups
+ .iter()
+ .map(|group| {
+ let terms = group
+ .iter()
+ .enumerate()
+ .map(|(idx, is_null)| {
+ if *is_null {
+ format_expr_with_alias(
+ &self.group_by.null_expr[idx],
+ )
+ } else {
+
format_expr_with_alias(&self.group_by.expr[idx])
+ }
+ })
+ .collect::<Vec<String>>()
+ .join(", ");
+ format!("({terms})")
+ })
+ .collect()
+ };
+ let a: Vec<String> = self
+ .aggr_expr
+ .iter()
+ .map(|agg| agg.name().to_string())
+ .collect();
+ writeln!(f, "mode={:?}", self.mode)?;
+ if !g.is_empty() {
+ writeln!(f, "group_by={}", g.join(", "))?;
+ }
+ writeln!(f, "aggr={}", a.join(", "))?;
}
}
Ok(())
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt
b/datafusion/sqllogictest/test_files/explain_tree.slt
index 4470cf9fae..45ddcb9a73 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -166,25 +166,42 @@ explain SELECT string_col, SUM(bigint_col) FROM table1
GROUP BY string_col;
physical_plan
01)┌───────────────────────────┐
02)│ AggregateExec │
-03)└─────────────┬─────────────┘
-04)┌─────────────┴─────────────┐
-05)│ CoalesceBatchesExec │
-06)└─────────────┬─────────────┘
-07)┌─────────────┴─────────────┐
-08)│ RepartitionExec │
-09)└─────────────┬─────────────┘
-10)┌─────────────┴─────────────┐
-11)│ AggregateExec │
+03)│ -------------------- │
+04)│ aggr: │
+05)│ sum(table1.bigint_col) │
+06)│ │
+07)│ group_by: │
+08)│ string_col@0 as string_col│
+09)│ │
+10)│ mode: │
+11)│ FinalPartitioned │
12)└─────────────┬─────────────┘
13)┌─────────────┴─────────────┐
-14)│ RepartitionExec │
+14)│ CoalesceBatchesExec │
15)└─────────────┬─────────────┘
16)┌─────────────┴─────────────┐
-17)│ DataSourceExec │
-18)│ -------------------- │
-19)│ files: 1 │
-20)│ format: csv │
-21)└───────────────────────────┘
+17)│ RepartitionExec │
+18)└─────────────┬─────────────┘
+19)┌─────────────┴─────────────┐
+20)│ AggregateExec │
+21)│ -------------------- │
+22)│ aggr: │
+23)│ sum(table1.bigint_col) │
+24)│ │
+25)│ group_by: │
+26)│ string_col@0 as string_col│
+27)│ │
+28)│ mode: Partial │
+29)└─────────────┬─────────────┘
+30)┌─────────────┴─────────────┐
+31)│ RepartitionExec │
+32)└─────────────┬─────────────┘
+33)┌─────────────┴─────────────┐
+34)│ DataSourceExec │
+35)│ -------------------- │
+36)│ files: 1 │
+37)│ format: csv │
+38)└───────────────────────────┘
# Limit
query TT
@@ -1076,22 +1093,28 @@ physical_plan
11)└───────────────────────────┘└─────────────┬─────────────┘
12)-----------------------------┌─────────────┴─────────────┐
13)-----------------------------│ AggregateExec │
-14)-----------------------------└─────────────┬─────────────┘
-15)-----------------------------┌─────────────┴─────────────┐
-16)-----------------------------│ CoalescePartitionsExec │
+14)-----------------------------│ -------------------- │
+15)-----------------------------│ aggr: count(Int64(1)) │
+16)-----------------------------│ mode: Final │
17)-----------------------------└─────────────┬─────────────┘
18)-----------------------------┌─────────────┴─────────────┐
-19)-----------------------------│ AggregateExec │
+19)-----------------------------│ CoalescePartitionsExec │
20)-----------------------------└─────────────┬─────────────┘
21)-----------------------------┌─────────────┴─────────────┐
-22)-----------------------------│ RepartitionExec │
-23)-----------------------------└─────────────┬─────────────┘
-24)-----------------------------┌─────────────┴─────────────┐
-25)-----------------------------│ DataSourceExec │
-26)-----------------------------│ -------------------- │
-27)-----------------------------│ files: 1 │
-28)-----------------------------│ format: parquet │
-29)-----------------------------└───────────────────────────┘
+22)-----------------------------│ AggregateExec │
+23)-----------------------------│ -------------------- │
+24)-----------------------------│ aggr: count(Int64(1)) │
+25)-----------------------------│ mode: Partial │
+26)-----------------------------└─────────────┬─────────────┘
+27)-----------------------------┌─────────────┴─────────────┐
+28)-----------------------------│ RepartitionExec │
+29)-----------------------------└─────────────┬─────────────┘
+30)-----------------------------┌─────────────┴─────────────┐
+31)-----------------------------│ DataSourceExec │
+32)-----------------------------│ -------------------- │
+33)-----------------------------│ files: 1 │
+34)-----------------------------│ format: parquet │
+35)-----------------------------└───────────────────────────┘
# Query with cross join.
query TT
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]