This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 6952d5ed2c Improve explain tree formatting for longer lines / word 
wrap (#15031)
6952d5ed2c is described below

commit 6952d5ed2cef9fde1aa3f304a36a282f96a43d60
Author: irenjj <[email protected]>
AuthorDate: Sat Mar 8 06:07:48 2025 +0800

    Improve explain tree formatting for longer lines / word wrap (#15031)
    
    * Improve explain tree formatting for longer lines / word wrap
    
    * fix redundant output
    
    * fix previous test error output
    
    * fix
    
    * fix issues
    
    * fix test
---
 datafusion/physical-plan/src/display.rs            |  54 ++++++++-
 .../sqllogictest/test_files/explain_tree.slt       | 122 ++++++++++++++++++++-
 2 files changed, 173 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-plan/src/display.rs 
b/datafusion/physical-plan/src/display.rs
index 096b515407..98ba3e1fd9 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -890,9 +890,13 @@ impl TreeRenderVisitor<'_, '_> {
                 splits = truncated_splits;
             }
             for split in splits {
-                // TODO: check every line is less than MAX_LINE_RENDER_SIZE.
-                result.push(split);
+                Self::split_string_buffer(&split, result);
             }
+            if result.len() > max_lines {
+                result.truncate(max_lines);
+                result.push("...".to_string());
+            }
+
             requires_padding = true;
             was_inlined = is_inlined;
         }
@@ -945,6 +949,52 @@ impl TreeRenderVisitor<'_, '_> {
 
         false
     }
+
+    fn split_string_buffer(source: &str, result: &mut Vec<String>) {
+        let mut character_pos = 0;
+        let mut start_pos = 0;
+        let mut render_width = 0;
+        let mut last_possible_split = 0;
+
+        let chars: Vec<char> = source.chars().collect();
+
+        while character_pos < chars.len() {
+            // Treating each char as width 1 for simplification
+            let char_width = 1;
+
+            // Does the next character make us exceed the line length?
+            if render_width + char_width > Self::NODE_RENDER_WIDTH - 2 {
+                if start_pos + 8 > last_possible_split {
+                    // The last character we can split on is one of the first 
8 characters of the line
+                    // to not create very small lines we instead split on the 
current character
+                    last_possible_split = character_pos;
+                }
+
+                
result.push(source[start_pos..last_possible_split].to_string());
+                render_width = character_pos - last_possible_split;
+                start_pos = last_possible_split;
+                character_pos = last_possible_split;
+            }
+
+            // check if we can split on this character
+            if Self::can_split_on_this_char(chars[character_pos]) {
+                last_possible_split = character_pos;
+            }
+
+            character_pos += 1;
+            render_width += char_width;
+        }
+
+        if source.len() > start_pos {
+            // append the remainder of the input
+            result.push(source[start_pos..].to_string());
+        }
+    }
+
+    fn can_split_on_this_char(c: char) -> bool {
+        (!c.is_ascii_digit() && !c.is_ascii_uppercase() && 
!c.is_ascii_lowercase())
+            && c != '_'
+    }
 }
 
 /// Trait for types which could have additional details when formatted in 
`Verbose` mode
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt 
b/datafusion/sqllogictest/test_files/explain_tree.slt
index 9d50b9bd62..9659bdae19 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -245,7 +245,97 @@ physical_plan
 05)│         FilterExec        │
 06)│    --------------------   │
 07)│         predicate:        │
-08)│string_col@1 != foo AND ...│
+08)│  string_col@1 != foo AND  │
+09)│     string_col@1 != bar   │
+10)│    AND string_col@1 != a  │
+11)│     really long string    │
+12)│          constant         │
+13)└─────────────┬─────────────┘
+14)┌─────────────┴─────────────┐
+15)│      RepartitionExec      │
+16)└─────────────┬─────────────┘
+17)┌─────────────┴─────────────┐
+18)│       DataSourceExec      │
+19)│    --------------------   │
+20)│          files: 1         │
+21)│        format: csv        │
+22)└───────────────────────────┘
+
+# Check maximum line limit.
+query TT
+explain SELECT int_col FROM table1
+WHERE string_col != 
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
+----
+logical_plan
+01)Projection: table1.int_col
+02)--Filter: table1.string_col != 
Utf8("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
+03)----TableScan: table1 projection=[int_col, string_col], 
partial_filters=[table1.string_col != 
Utf8("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
+physical_plan
+01)┌───────────────────────────┐
+02)│    CoalesceBatchesExec    │
+03)└─────────────┬─────────────┘
+04)┌─────────────┴─────────────┐
+05)│         FilterExec        │
+06)│    --------------------   │
+07)│         predicate:        │
+08)│      string_col@1 !=      │
+09)│       aaaaaaaaaaaaaa      │
+10)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+11)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+12)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+13)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+14)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+15)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+16)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+17)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+18)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+19)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+20)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+21)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+22)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+23)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+24)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+25)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+26)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+27)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+28)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+29)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+30)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+31)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+32)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+33)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+34)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+35)│aaaaaaaaaaaaaaaaaaaaaaaaaaa│
+36)│            ...            │
+37)└─────────────┬─────────────┘
+38)┌─────────────┴─────────────┐
+39)│      RepartitionExec      │
+40)└─────────────┬─────────────┘
+41)┌─────────────┴─────────────┐
+42)│       DataSourceExec      │
+43)│    --------------------   │
+44)│          files: 1         │
+45)│        format: csv        │
+46)└───────────────────────────┘
+
+# Check exactly the render width.
+query TT
+explain SELECT int_col FROM table1
+WHERE string_col != 'aaaaaaaaaaa';
+----
+logical_plan
+01)Projection: table1.int_col
+02)--Filter: table1.string_col != Utf8("aaaaaaaaaaa")
+03)----TableScan: table1 projection=[int_col, string_col], 
partial_filters=[table1.string_col != Utf8("aaaaaaaaaaa")]
+physical_plan
+01)┌───────────────────────────┐
+02)│    CoalesceBatchesExec    │
+03)└─────────────┬─────────────┘
+04)┌─────────────┴─────────────┐
+05)│         FilterExec        │
+06)│    --------------------   │
+07)│         predicate:        │
+08)│string_col@1 != aaaaaaaaaaa│
 09)└─────────────┬─────────────┘
 10)┌─────────────┴─────────────┐
 11)│      RepartitionExec      │
@@ -257,6 +347,36 @@ physical_plan
 17)│        format: csv        │
 18)└───────────────────────────┘
 
+# Check with the render witdth + 1.
+query TT
+explain SELECT int_col FROM table1
+WHERE string_col != 'aaaaaaaaaaaa';
+----
+logical_plan
+01)Projection: table1.int_col
+02)--Filter: table1.string_col != Utf8("aaaaaaaaaaaa")
+03)----TableScan: table1 projection=[int_col, string_col], 
partial_filters=[table1.string_col != Utf8("aaaaaaaaaaaa")]
+physical_plan
+01)┌───────────────────────────┐
+02)│    CoalesceBatchesExec    │
+03)└─────────────┬─────────────┘
+04)┌─────────────┴─────────────┐
+05)│         FilterExec        │
+06)│    --------------------   │
+07)│         predicate:        │
+08)│      string_col@1 !=      │
+09)│        aaaaaaaaaaaa       │
+10)└─────────────┬─────────────┘
+11)┌─────────────┴─────────────┐
+12)│      RepartitionExec      │
+13)└─────────────┬─────────────┘
+14)┌─────────────┴─────────────┐
+15)│       DataSourceExec      │
+16)│    --------------------   │
+17)│          files: 1         │
+18)│        format: csv        │
+19)└───────────────────────────┘
+
 # Query with filter on csv
 query TT
 explain SELECT int_col FROM table1 WHERE string_col != 'foo';


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to