This is an automated email from the ASF dual-hosted git repository.

mneumann pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new ba50a8b178 Minor: provide default implementation for 
ExecutionPlan::statistics (#7911)
ba50a8b178 is described below

commit ba50a8b178eece7e79b100d0b73bdc9d6d3ec6d5
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Oct 24 07:02:49 2023 -0400

    Minor: provide default implementation for ExecutionPlan::statistics (#7911)
    
    * Minor: provide default implementation for ExecutionPlan::statistics
    
    * fix: update statistics
---
 datafusion-examples/examples/custom_datasource.rs |  6 +-----
 datafusion/core/src/physical_planner.rs           |  8 +-------
 datafusion/core/src/test_util/mod.rs              |  6 +-----
 datafusion/physical-plan/src/analyze.rs           |  7 +------
 datafusion/physical-plan/src/explain.rs           |  7 +------
 datafusion/physical-plan/src/insert.rs            |  5 -----
 datafusion/physical-plan/src/lib.rs               |  8 ++++++--
 datafusion/physical-plan/src/streaming.rs         |  6 +-----
 datafusion/physical-plan/src/test/exec.rs         | 12 ------------
 datafusion/physical-plan/src/unnest.rs            |  6 +-----
 10 files changed, 13 insertions(+), 58 deletions(-)

diff --git a/datafusion-examples/examples/custom_datasource.rs 
b/datafusion-examples/examples/custom_datasource.rs
index dd36665a93..9f25a0b2fa 100644
--- a/datafusion-examples/examples/custom_datasource.rs
+++ b/datafusion-examples/examples/custom_datasource.rs
@@ -32,7 +32,7 @@ use datafusion::physical_plan::expressions::PhysicalSortExpr;
 use datafusion::physical_plan::memory::MemoryStream;
 use datafusion::physical_plan::{
     project_schema, DisplayAs, DisplayFormatType, ExecutionPlan,
-    SendableRecordBatchStream, Statistics,
+    SendableRecordBatchStream,
 };
 use datafusion::prelude::*;
 use datafusion_expr::{Expr, LogicalPlanBuilder};
@@ -270,8 +270,4 @@ impl ExecutionPlan for CustomExec {
             None,
         )?))
     }
-
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema()))
-    }
 }
diff --git a/datafusion/core/src/physical_planner.rs 
b/datafusion/core/src/physical_planner.rs
index 5a1fdcaee5..419f62cff6 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -2057,9 +2057,7 @@ mod tests {
     use super::*;
     use crate::datasource::file_format::options::CsvReadOptions;
     use crate::datasource::MemTable;
-    use crate::physical_plan::{
-        expressions, DisplayFormatType, Partitioning, Statistics,
-    };
+    use crate::physical_plan::{expressions, DisplayFormatType, Partitioning};
     use crate::physical_plan::{DisplayAs, SendableRecordBatchStream};
     use crate::physical_planner::PhysicalPlanner;
     use crate::prelude::{SessionConfig, SessionContext};
@@ -2670,10 +2668,6 @@ mod tests {
         ) -> Result<SendableRecordBatchStream> {
             unimplemented!("NoOpExecutionPlan::execute");
         }
-
-        fn statistics(&self) -> Result<Statistics> {
-            unimplemented!("NoOpExecutionPlan::statistics");
-        }
     }
 
     //  Produces an execution plan where the schema is mismatched from
diff --git a/datafusion/core/src/test_util/mod.rs 
b/datafusion/core/src/test_util/mod.rs
index d826ec8bfb..4fe022f176 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -40,7 +40,7 @@ use crate::prelude::{CsvReadOptions, SessionContext};
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{Statistics, TableReference};
+use datafusion_common::TableReference;
 use datafusion_expr::{CreateExternalTable, Expr, TableType};
 use datafusion_physical_expr::PhysicalSortExpr;
 
@@ -238,10 +238,6 @@ impl ExecutionPlan for UnboundedExec {
             batch: self.batch.clone(),
         }))
     }
-
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema()))
-    }
 }
 
 #[derive(Debug)]
diff --git a/datafusion/physical-plan/src/analyze.rs 
b/datafusion/physical-plan/src/analyze.rs
index bce2425135..ded37983bb 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -25,7 +25,7 @@ use super::stream::{RecordBatchReceiverStream, 
RecordBatchStreamAdapter};
 use super::{DisplayAs, Distribution, SendableRecordBatchStream};
 
 use crate::display::DisplayableExecutionPlan;
-use crate::{DisplayFormatType, ExecutionPlan, Partitioning, Statistics};
+use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
 use arrow::{array::StringBuilder, datatypes::SchemaRef, 
record_batch::RecordBatch};
 use datafusion_common::{internal_err, DataFusionError, Result};
@@ -195,11 +195,6 @@ impl ExecutionPlan for AnalyzeExec {
             futures::stream::once(output),
         )))
     }
-
-    fn statistics(&self) -> Result<Statistics> {
-        // Statistics an an ANALYZE plan are not relevant
-        Ok(Statistics::new_unknown(&self.schema()))
-    }
 }
 
 /// Creates the ouput of AnalyzeExec as a RecordBatch
diff --git a/datafusion/physical-plan/src/explain.rs 
b/datafusion/physical-plan/src/explain.rs
index 81b8f99441..e4904ddd34 100644
--- a/datafusion/physical-plan/src/explain.rs
+++ b/datafusion/physical-plan/src/explain.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 use super::expressions::PhysicalSortExpr;
 use super::{DisplayAs, SendableRecordBatchStream};
 use crate::stream::RecordBatchStreamAdapter;
-use crate::{DisplayFormatType, ExecutionPlan, Partitioning, Statistics};
+use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
 use arrow::{array::StringBuilder, datatypes::SchemaRef, 
record_batch::RecordBatch};
 use datafusion_common::display::StringifiedPlan;
@@ -167,11 +167,6 @@ impl ExecutionPlan for ExplainExec {
             futures::stream::iter(vec![Ok(record_batch)]),
         )))
     }
-
-    fn statistics(&self) -> Result<Statistics> {
-        // Statistics an EXPLAIN plan are not relevant
-        Ok(Statistics::new_unknown(&self.schema()))
-    }
 }
 
 /// If this plan should be shown, given the previous plan that was
diff --git a/datafusion/physical-plan/src/insert.rs 
b/datafusion/physical-plan/src/insert.rs
index d1f2706930..627d58e137 100644
--- a/datafusion/physical-plan/src/insert.rs
+++ b/datafusion/physical-plan/src/insert.rs
@@ -25,7 +25,6 @@ use std::sync::Arc;
 use super::expressions::PhysicalSortExpr;
 use super::{
     DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, 
SendableRecordBatchStream,
-    Statistics,
 };
 use crate::metrics::MetricsSet;
 use crate::stream::RecordBatchStreamAdapter;
@@ -276,10 +275,6 @@ impl ExecutionPlan for FileSinkExec {
             stream,
         )))
     }
-
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema()))
-    }
 }
 
 /// Create a output record batch with a count
diff --git a/datafusion/physical-plan/src/lib.rs 
b/datafusion/physical-plan/src/lib.rs
index d7987ba95a..b2f81579f8 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -231,8 +231,12 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
         None
     }
 
-    /// Returns the global output statistics for this `ExecutionPlan` node.
-    fn statistics(&self) -> Result<Statistics>;
+    /// Returns statistics for this `ExecutionPlan` node. If statistics are not
+    /// available, should return [`Statistics::new_unknown`] (the default), not
+    /// an error.
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&self.schema()))
+    }
 }
 
 /// Indicate whether a data exchange is needed for the input of `plan`, which 
will be very helpful
diff --git a/datafusion/physical-plan/src/streaming.rs 
b/datafusion/physical-plan/src/streaming.rs
index 7bfa7e2cee..27f03b727c 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -26,7 +26,7 @@ use crate::stream::RecordBatchStreamAdapter;
 use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
 
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{internal_err, plan_err, DataFusionError, Result, 
Statistics};
+use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
 
@@ -187,8 +187,4 @@ impl ExecutionPlan for StreamingTableExec {
             None => stream,
         })
     }
-
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema()))
-    }
 }
diff --git a/datafusion/physical-plan/src/test/exec.rs 
b/datafusion/physical-plan/src/test/exec.rs
index f90f4231c6..71e6cba674 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -453,10 +453,6 @@ impl ExecutionPlan for ErrorExec {
     ) -> Result<SendableRecordBatchStream> {
         internal_err!("ErrorExec, unsurprisingly, errored in partition 
{partition}")
     }
-
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema()))
-    }
 }
 
 /// A mock execution plan that simply returns the provided statistics
@@ -627,10 +623,6 @@ impl ExecutionPlan for BlockingExec {
             _refs: Arc::clone(&self.refs),
         }))
     }
-
-    fn statistics(&self) -> Result<Statistics> {
-        unimplemented!()
-    }
 }
 
 /// A [`RecordBatchStream`] that is pending forever.
@@ -764,10 +756,6 @@ impl ExecutionPlan for PanicExec {
             ready: false,
         }))
     }
-
-    fn statistics(&self) -> Result<Statistics> {
-        unimplemented!()
-    }
 }
 
 /// A [`RecordBatchStream`] that yields every other batch and panics
diff --git a/datafusion/physical-plan/src/unnest.rs 
b/datafusion/physical-plan/src/unnest.rs
index ed64735e5a..30f109953c 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -25,7 +25,7 @@ use super::DisplayAs;
 use crate::{
     expressions::Column, DisplayFormatType, Distribution, 
EquivalenceProperties,
     ExecutionPlan, Partitioning, PhysicalExpr, PhysicalSortExpr, 
RecordBatchStream,
-    SendableRecordBatchStream, Statistics,
+    SendableRecordBatchStream,
 };
 
 use arrow::array::{
@@ -159,10 +159,6 @@ impl ExecutionPlan for UnnestExec {
             unnest_time: 0,
         }))
     }
-
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&self.schema()))
-    }
 }
 
 /// A stream that issues [RecordBatch]es with unnested column data.

Reply via email to