This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 6952d5ed2c Improve explain tree formatting for longer lines / word
wrap (#15031)
6952d5ed2c is described below
commit 6952d5ed2cef9fde1aa3f304a36a282f96a43d60
Author: irenjj <[email protected]>
AuthorDate: Sat Mar 8 06:07:48 2025 +0800
Improve explain tree formatting for longer lines / word wrap (#15031)
* Improve explain tree formatting for longer lines / word wrap
* fix redundant output
* fix previous test error output
* fix
* fix issues
* fix test
---
datafusion/physical-plan/src/display.rs | 54 ++++++++-
.../sqllogictest/test_files/explain_tree.slt | 122 ++++++++++++++++++++-
2 files changed, 173 insertions(+), 3 deletions(-)
diff --git a/datafusion/physical-plan/src/display.rs
b/datafusion/physical-plan/src/display.rs
index 096b515407..98ba3e1fd9 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -890,9 +890,13 @@ impl TreeRenderVisitor<'_, '_> {
splits = truncated_splits;
}
for split in splits {
- // TODO: check every line is less than MAX_LINE_RENDER_SIZE.
- result.push(split);
+ Self::split_string_buffer(&split, result);
}
+ if result.len() > max_lines {
+ result.truncate(max_lines);
+ result.push("...".to_string());
+ }
+
requires_padding = true;
was_inlined = is_inlined;
}
@@ -945,6 +949,52 @@ impl TreeRenderVisitor<'_, '_> {
false
}
+
+ fn split_string_buffer(source: &str, result: &mut Vec<String>) {
+ let mut character_pos = 0;
+ let mut start_pos = 0;
+ let mut render_width = 0;
+ let mut last_possible_split = 0;
+
+ let chars: Vec<char> = source.chars().collect();
+
+ while character_pos < chars.len() {
+ // Treating each char as width 1 for simplification
+ let char_width = 1;
+
+ // Does the next character make us exceed the line length?
+ if render_width + char_width > Self::NODE_RENDER_WIDTH - 2 {
+ if start_pos + 8 > last_possible_split {
+ // The last character we can split on is one of the first
8 characters of the line
+ // to not create very small lines we instead split on the
current character
+ last_possible_split = character_pos;
+ }
+
+
result.push(source[start_pos..last_possible_split].to_string());
+ render_width = character_pos - last_possible_split;
+ start_pos = last_possible_split;
+ character_pos = last_possible_split;
+ }
+
+ // check if we can split on this character
+ if Self::can_split_on_this_char(chars[character_pos]) {
+ last_possible_split = character_pos;
+ }
+
+ character_pos += 1;
+ render_width += char_width;
+ }
+
+ if source.len() > start_pos {
+ // append the remainder of the input
+ result.push(source[start_pos..].to_string());
+ }
+ }
+
+ fn can_split_on_this_char(c: char) -> bool {
+ (!c.is_ascii_digit() && !c.is_ascii_uppercase() &&
!c.is_ascii_lowercase())
+ && c != '_'
+ }
}
/// Trait for types which could have additional details when formatted in
`Verbose` mode
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt
b/datafusion/sqllogictest/test_files/explain_tree.slt
index 9d50b9bd62..9659bdae19 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -245,7 +245,97 @@ physical_plan
05)│ FilterExec │
06)│ -------------------- │
07)│ predicate: │
-08)│string_col@1 != foo AND ...│
+08)│ string_col@1 != foo AND │
+09)│ string_col@1 != bar │
+10)│ AND string_col@1 != a │
+11)│ really long string │
+12)│ constant │
+13)└─────────────┬─────────────┘
+14)┌─────────────┴─────────────┐
+15)│ RepartitionExec │
+16)└─────────────┬─────────────┘
+17)┌─────────────┴─────────────┐
+18)│ DataSourceExec │
+19)│ -------------------- │
+20)│ files: 1 │
+21)│ format: csv │
+22)└───────────────────────────┘
+
+# Check maximum line limit.
+query TT
+explain SELECT int_col FROM table1
+WHERE string_col !=
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
+----
+logical_plan
+01)Projection: table1.int_col
+02)--Filter: table1.string_col !=
Utf8("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
+03)----TableScan: table1 projection=[int_col, string_col],
partial_filters=[table1.string_col !=
Utf8("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
+physical_plan
+01)┌───────────────────────────┐
+02)│ CoalesceBatchesExec │
+03)└─────────────┬─────────────┘
+04)┌─────────────┴─────────────┐
+05)│ FilterExec │
+06)│ -------------------- │
+07)│ predicate: │
+08)│ string_col@1 != │
+09)│ aaaaaaaaaaaaaa │
+10)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+11)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+12)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+13)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+14)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+15)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+16)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+17)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+18)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+19)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+20)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+21)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+22)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+23)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+24)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+25)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+26)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+27)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+28)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+29)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+30)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+31)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+32)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+33)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+34)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+35)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+36)│ ... │
+37)└─────────────┬─────────────┘
+38)┌─────────────┴─────────────┐
+39)│ RepartitionExec │
+40)└─────────────┬─────────────┘
+41)┌─────────────┴─────────────┐
+42)│ DataSourceExec │
+43)│ -------------------- │
+44)│ files: 1 │
+45)│ format: csv │
+46)└───────────────────────────┘
+
+# Check exactly the render width.
+query TT
+explain SELECT int_col FROM table1
+WHERE string_col != 'aaaaaaaaaaa';
+----
+logical_plan
+01)Projection: table1.int_col
+02)--Filter: table1.string_col != Utf8("aaaaaaaaaaa")
+03)----TableScan: table1 projection=[int_col, string_col],
partial_filters=[table1.string_col != Utf8("aaaaaaaaaaa")]
+physical_plan
+01)┌───────────────────────────┐
+02)│ CoalesceBatchesExec │
+03)└─────────────┬─────────────┘
+04)┌─────────────┴─────────────┐
+05)│ FilterExec │
+06)│ -------------------- │
+07)│ predicate: │
+08)│string_col@1 != aaaaaaaaaaa│
09)└─────────────┬─────────────┘
10)┌─────────────┴─────────────┐
11)│ RepartitionExec │
@@ -257,6 +347,36 @@ physical_plan
17)│ format: csv │
18)└───────────────────────────┘
+# Check with the render witdth + 1.
+query TT
+explain SELECT int_col FROM table1
+WHERE string_col != 'aaaaaaaaaaaa';
+----
+logical_plan
+01)Projection: table1.int_col
+02)--Filter: table1.string_col != Utf8("aaaaaaaaaaaa")
+03)----TableScan: table1 projection=[int_col, string_col],
partial_filters=[table1.string_col != Utf8("aaaaaaaaaaaa")]
+physical_plan
+01)┌───────────────────────────┐
+02)│ CoalesceBatchesExec │
+03)└─────────────┬─────────────┘
+04)┌─────────────┴─────────────┐
+05)│ FilterExec │
+06)│ -------------------- │
+07)│ predicate: │
+08)│ string_col@1 != │
+09)│ aaaaaaaaaaaa │
+10)└─────────────┬─────────────┘
+11)┌─────────────┴─────────────┐
+12)│ RepartitionExec │
+13)└─────────────┬─────────────┘
+14)┌─────────────┴─────────────┐
+15)│ DataSourceExec │
+16)│ -------------------- │
+17)│ files: 1 │
+18)│ format: csv │
+19)└───────────────────────────┘
+
# Query with filter on csv
query TT
explain SELECT int_col FROM table1 WHERE string_col != 'foo';
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]