This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 0ea9bc659c feat(CLI): print column headers for empty query results 
(#10300)
0ea9bc659c is described below

commit 0ea9bc659cbcdd1d70cf8ea8c284634d65171c33
Author: Jonah Gao <[email protected]>
AuthorDate: Tue Apr 30 02:51:32 2024 +0800

    feat(CLI): print column headers for empty query results (#10300)
    
    * feat(CLI): print column headers for empty query results
    
    * Narrow now()'s scope
    
    * retry ci
---
 datafusion-cli/src/command.rs       |  18 ++---
 datafusion-cli/src/exec.rs          |   5 +-
 datafusion-cli/src/print_format.rs  | 130 +++++++++++++++++++++++++++++-------
 datafusion-cli/src/print_options.rs |   7 +-
 4 files changed, 122 insertions(+), 38 deletions(-)

diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs
index d3d7b65f0a..be6393351a 100644
--- a/datafusion-cli/src/command.rs
+++ b/datafusion-cli/src/command.rs
@@ -17,7 +17,7 @@
 
 //! Command within CLI
 
-use crate::exec::exec_from_lines;
+use crate::exec::{exec_and_print, exec_from_lines};
 use crate::functions::{display_all_functions, Function};
 use crate::print_format::PrintFormat;
 use crate::print_options::PrintOptions;
@@ -58,18 +58,18 @@ impl Command {
         ctx: &mut SessionContext,
         print_options: &mut PrintOptions,
     ) -> Result<()> {
-        let now = Instant::now();
         match self {
-            Self::Help => print_options.print_batches(&[all_commands_info()], 
now),
+            Self::Help => {
+                let now = Instant::now();
+                let command_batch = all_commands_info();
+                print_options.print_batches(command_batch.schema(), 
&[command_batch], now)
+            }
             Self::ListTables => {
-                let df = ctx.sql("SHOW TABLES").await?;
-                let batches = df.collect().await?;
-                print_options.print_batches(&batches, now)
+                exec_and_print(ctx, print_options, "SHOW TABLES".into()).await
             }
             Self::DescribeTableStmt(name) => {
-                let df = ctx.sql(&format!("SHOW COLUMNS FROM {}", 
name)).await?;
-                let batches = df.collect().await?;
-                print_options.print_batches(&batches, now)
+                exec_and_print(ctx, print_options, format!("SHOW COLUMNS FROM 
{}", name))
+                    .await
             }
             Self::Include(filename) => {
                 if let Some(filename) = filename {
diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index 5fbcea0c06..19bff0528b 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -203,7 +203,7 @@ pub async fn exec_from_repl(
     rl.save_history(".history")
 }
 
-async fn exec_and_print(
+pub(super) async fn exec_and_print(
     ctx: &mut SessionContext,
     print_options: &PrintOptions,
     sql: String,
@@ -235,8 +235,9 @@ async fn exec_and_print(
             let stream = execute_stream(physical_plan, task_ctx.clone())?;
             print_options.print_stream(stream, now).await?;
         } else {
+            let schema = physical_plan.schema();
             let results = collect(physical_plan, task_ctx.clone()).await?;
-            adjusted.into_inner().print_batches(&results, now)?;
+            adjusted.into_inner().print_batches(schema, &results, now)?;
         }
     }
 
diff --git a/datafusion-cli/src/print_format.rs 
b/datafusion-cli/src/print_format.rs
index 2de52be612..c95bde7fc6 100644
--- a/datafusion-cli/src/print_format.rs
+++ b/datafusion-cli/src/print_format.rs
@@ -22,6 +22,7 @@ use std::str::FromStr;
 use crate::print_options::MaxRows;
 
 use arrow::csv::writer::WriterBuilder;
+use arrow::datatypes::SchemaRef;
 use arrow::json::{ArrayWriter, LineDelimitedWriter};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::pretty_format_batches_with_options;
@@ -157,6 +158,7 @@ impl PrintFormat {
     pub fn print_batches<W: std::io::Write>(
         &self,
         writer: &mut W,
+        schema: SchemaRef,
         batches: &[RecordBatch],
         maxrows: MaxRows,
         with_header: bool,
@@ -168,7 +170,7 @@ impl PrintFormat {
             .cloned()
             .collect();
         if batches.is_empty() {
-            return Ok(());
+            return self.print_empty(writer, schema);
         }
 
         match self {
@@ -186,6 +188,27 @@ impl PrintFormat {
             Self::NdJson => batches_to_json!(LineDelimitedWriter, writer, 
&batches),
         }
     }
+
+    /// Print when the result batches contain no rows
+    fn print_empty<W: std::io::Write>(
+        &self,
+        writer: &mut W,
+        schema: SchemaRef,
+    ) -> Result<()> {
+        match self {
+            // Print column headers for Table format
+            Self::Table if !schema.fields().is_empty() => {
+                let empty_batch = RecordBatch::new_empty(schema);
+                let formatted = pretty_format_batches_with_options(
+                    &[empty_batch],
+                    &DEFAULT_FORMAT_OPTIONS,
+                )?;
+                writeln!(writer, "{}", formatted)?;
+            }
+            _ => {}
+        }
+        Ok(())
+    }
 }
 
 #[cfg(test)]
@@ -193,7 +216,7 @@ mod tests {
     use super::*;
     use std::sync::Arc;
 
-    use arrow::array::{ArrayRef, Int32Array};
+    use arrow::array::Int32Array;
     use arrow::datatypes::{DataType, Field, Schema};
 
     #[test]
@@ -201,7 +224,6 @@ mod tests {
         for format in [
             PrintFormat::Csv,
             PrintFormat::Tsv,
-            PrintFormat::Table,
             PrintFormat::Json,
             PrintFormat::NdJson,
             PrintFormat::Automatic,
@@ -209,10 +231,26 @@ mod tests {
             // no output for empty batches, even with header set
             PrintBatchesTest::new()
                 .with_format(format)
+                .with_schema(three_column_schema())
                 .with_batches(vec![])
                 .with_expected(&[""])
                 .run();
         }
+
+        // output column headers for empty batches when format is Table
+        #[rustfmt::skip]
+        let expected = &[
+            "+---+---+---+",
+            "| a | b | c |",
+            "+---+---+---+",
+            "+---+---+---+",
+        ];
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Table)
+            .with_schema(three_column_schema())
+            .with_batches(vec![])
+            .with_expected(expected)
+            .run();
     }
 
     #[test]
@@ -385,6 +423,7 @@ mod tests {
         for max_rows in [MaxRows::Unlimited, MaxRows::Limited(5), 
MaxRows::Limited(3)] {
             PrintBatchesTest::new()
                 .with_format(PrintFormat::Table)
+                .with_schema(one_column_schema())
                 .with_batches(vec![one_column_batch()])
                 .with_maxrows(max_rows)
                 .with_expected(expected)
@@ -450,15 +489,15 @@ mod tests {
         let empty_batch = RecordBatch::new_empty(batch.schema());
 
         #[rustfmt::skip]
-            let expected =&[
-                "+---+",
-                "| a |",
-                "+---+",
-                "| 1 |",
-                "| 2 |",
-                "| 3 |",
-                "+---+",
-            ];
+        let expected =&[
+            "+---+",
+            "| a |",
+            "+---+",
+            "| 1 |",
+            "| 2 |",
+            "| 3 |",
+            "+---+",
+        ];
 
         PrintBatchesTest::new()
             .with_format(PrintFormat::Table)
@@ -468,14 +507,32 @@ mod tests {
     }
 
     #[test]
-    fn test_print_batches_empty_batches_no_header() {
+    fn test_print_batches_empty_batch() {
         let empty_batch = RecordBatch::new_empty(one_column_batch().schema());
 
-        // empty batches should not print a header
-        let expected = &[""];
+        // Print column headers for empty batch when format is Table
+        #[rustfmt::skip]
+        let expected =&[
+            "+---+",
+            "| a |",
+            "+---+",
+            "+---+",
+        ];
+
+        PrintBatchesTest::new()
+            .with_format(PrintFormat::Table)
+            .with_schema(one_column_schema())
+            .with_batches(vec![empty_batch])
+            .with_header(WithHeader::Yes)
+            .with_expected(expected)
+            .run();
 
+        // No output for empty batch when schema contains no columns
+        let empty_batch = RecordBatch::new_empty(Arc::new(Schema::empty()));
+        let expected = &[""];
         PrintBatchesTest::new()
             .with_format(PrintFormat::Table)
+            .with_schema(Arc::new(Schema::empty()))
             .with_batches(vec![empty_batch])
             .with_header(WithHeader::Yes)
             .with_expected(expected)
@@ -485,6 +542,7 @@ mod tests {
     #[derive(Debug)]
     struct PrintBatchesTest {
         format: PrintFormat,
+        schema: SchemaRef,
         batches: Vec<RecordBatch>,
         maxrows: MaxRows,
         with_header: WithHeader,
@@ -504,6 +562,7 @@ mod tests {
         fn new() -> Self {
             Self {
                 format: PrintFormat::Table,
+                schema: Arc::new(Schema::empty()),
                 batches: vec![],
                 maxrows: MaxRows::Unlimited,
                 with_header: WithHeader::Ignored,
@@ -517,6 +576,12 @@ mod tests {
             self
         }
 
+        // set the schema
+        fn with_schema(mut self, schema: SchemaRef) -> Self {
+            self.schema = schema;
+            self
+        }
+
         /// set the batches to convert
         fn with_batches(mut self, batches: Vec<RecordBatch>) -> Self {
             self.batches = batches;
@@ -573,21 +638,31 @@ mod tests {
         fn output_with_header(&self, with_header: bool) -> String {
             let mut buffer: Vec<u8> = vec![];
             self.format
-                .print_batches(&mut buffer, &self.batches, self.maxrows, 
with_header)
+                .print_batches(
+                    &mut buffer,
+                    self.schema.clone(),
+                    &self.batches,
+                    self.maxrows,
+                    with_header,
+                )
                 .unwrap();
             String::from_utf8(buffer).unwrap()
         }
     }
 
-    /// Return a batch with three columns and three rows
-    fn three_column_batch() -> RecordBatch {
-        let schema = Arc::new(Schema::new(vec![
+    /// Return a schema with three columns
+    fn three_column_schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
             Field::new("a", DataType::Int32, false),
             Field::new("b", DataType::Int32, false),
             Field::new("c", DataType::Int32, false),
-        ]));
+        ]))
+    }
+
+    /// Return a batch with three columns and three rows
+    fn three_column_batch() -> RecordBatch {
         RecordBatch::try_new(
-            schema,
+            three_column_schema(),
             vec![
                 Arc::new(Int32Array::from(vec![1, 2, 3])),
                 Arc::new(Int32Array::from(vec![4, 5, 6])),
@@ -597,12 +672,17 @@ mod tests {
         .unwrap()
     }
 
+    /// Return a schema with one column
+    fn one_column_schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]))
+    }
+
     /// return a batch with one column and three rows
     fn one_column_batch() -> RecordBatch {
-        RecordBatch::try_from_iter(vec![(
-            "a",
-            Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
-        )])
+        RecordBatch::try_new(
+            one_column_schema(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
         .unwrap()
     }
 
diff --git a/datafusion-cli/src/print_options.rs 
b/datafusion-cli/src/print_options.rs
index bede5dd15e..e80cc55663 100644
--- a/datafusion-cli/src/print_options.rs
+++ b/datafusion-cli/src/print_options.rs
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion::common::instant::Instant;
 use std::fmt::{Display, Formatter};
 use std::io::Write;
 use std::pin::Pin;
@@ -23,7 +22,9 @@ use std::str::FromStr;
 
 use crate::print_format::PrintFormat;
 
+use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
+use datafusion::common::instant::Instant;
 use datafusion::common::DataFusionError;
 use datafusion::error::Result;
 use datafusion::physical_plan::RecordBatchStream;
@@ -98,6 +99,7 @@ impl PrintOptions {
     /// Print the batches to stdout using the specified format
     pub fn print_batches(
         &self,
+        schema: SchemaRef,
         batches: &[RecordBatch],
         query_start_time: Instant,
     ) -> Result<()> {
@@ -105,7 +107,7 @@ impl PrintOptions {
         let mut writer = stdout.lock();
 
         self.format
-            .print_batches(&mut writer, batches, self.maxrows, true)?;
+            .print_batches(&mut writer, schema, batches, self.maxrows, true)?;
 
         let row_count: usize = batches.iter().map(|b| b.num_rows()).sum();
         let formatted_exec_details = get_execution_details_formatted(
@@ -148,6 +150,7 @@ impl PrintOptions {
             row_count += batch.num_rows();
             self.format.print_batches(
                 &mut writer,
+                batch.schema(),
                 &[batch],
                 MaxRows::Unlimited,
                 with_header,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to