This is an automated email from the ASF dual-hosted git repository.
mneumann pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new ba50a8b178 Minor: provide default implementation for
ExecutionPlan::statistics (#7911)
ba50a8b178 is described below
commit ba50a8b178eece7e79b100d0b73bdc9d6d3ec6d5
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Oct 24 07:02:49 2023 -0400
Minor: provide default implementation for ExecutionPlan::statistics (#7911)
* Minor: provide default implementation for ExecutionPlan::statistics
* fix: update statistics
---
datafusion-examples/examples/custom_datasource.rs | 6 +-----
datafusion/core/src/physical_planner.rs | 8 +-------
datafusion/core/src/test_util/mod.rs | 6 +-----
datafusion/physical-plan/src/analyze.rs | 7 +------
datafusion/physical-plan/src/explain.rs | 7 +------
datafusion/physical-plan/src/insert.rs | 5 -----
datafusion/physical-plan/src/lib.rs | 8 ++++++--
datafusion/physical-plan/src/streaming.rs | 6 +-----
datafusion/physical-plan/src/test/exec.rs | 12 ------------
datafusion/physical-plan/src/unnest.rs | 6 +-----
10 files changed, 13 insertions(+), 58 deletions(-)
diff --git a/datafusion-examples/examples/custom_datasource.rs
b/datafusion-examples/examples/custom_datasource.rs
index dd36665a93..9f25a0b2fa 100644
--- a/datafusion-examples/examples/custom_datasource.rs
+++ b/datafusion-examples/examples/custom_datasource.rs
@@ -32,7 +32,7 @@ use datafusion::physical_plan::expressions::PhysicalSortExpr;
use datafusion::physical_plan::memory::MemoryStream;
use datafusion::physical_plan::{
project_schema, DisplayAs, DisplayFormatType, ExecutionPlan,
- SendableRecordBatchStream, Statistics,
+ SendableRecordBatchStream,
};
use datafusion::prelude::*;
use datafusion_expr::{Expr, LogicalPlanBuilder};
@@ -270,8 +270,4 @@ impl ExecutionPlan for CustomExec {
None,
)?))
}
-
- fn statistics(&self) -> Result<Statistics> {
- Ok(Statistics::new_unknown(&self.schema()))
- }
}
diff --git a/datafusion/core/src/physical_planner.rs
b/datafusion/core/src/physical_planner.rs
index 5a1fdcaee5..419f62cff6 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -2057,9 +2057,7 @@ mod tests {
use super::*;
use crate::datasource::file_format::options::CsvReadOptions;
use crate::datasource::MemTable;
- use crate::physical_plan::{
- expressions, DisplayFormatType, Partitioning, Statistics,
- };
+ use crate::physical_plan::{expressions, DisplayFormatType, Partitioning};
use crate::physical_plan::{DisplayAs, SendableRecordBatchStream};
use crate::physical_planner::PhysicalPlanner;
use crate::prelude::{SessionConfig, SessionContext};
@@ -2670,10 +2668,6 @@ mod tests {
) -> Result<SendableRecordBatchStream> {
unimplemented!("NoOpExecutionPlan::execute");
}
-
- fn statistics(&self) -> Result<Statistics> {
- unimplemented!("NoOpExecutionPlan::statistics");
- }
}
// Produces an execution plan where the schema is mismatched from
diff --git a/datafusion/core/src/test_util/mod.rs
b/datafusion/core/src/test_util/mod.rs
index d826ec8bfb..4fe022f176 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -40,7 +40,7 @@ use crate::prelude::{CsvReadOptions, SessionContext};
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use arrow::record_batch::RecordBatch;
-use datafusion_common::{Statistics, TableReference};
+use datafusion_common::TableReference;
use datafusion_expr::{CreateExternalTable, Expr, TableType};
use datafusion_physical_expr::PhysicalSortExpr;
@@ -238,10 +238,6 @@ impl ExecutionPlan for UnboundedExec {
batch: self.batch.clone(),
}))
}
-
- fn statistics(&self) -> Result<Statistics> {
- Ok(Statistics::new_unknown(&self.schema()))
- }
}
#[derive(Debug)]
diff --git a/datafusion/physical-plan/src/analyze.rs
b/datafusion/physical-plan/src/analyze.rs
index bce2425135..ded37983bb 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -25,7 +25,7 @@ use super::stream::{RecordBatchReceiverStream,
RecordBatchStreamAdapter};
use super::{DisplayAs, Distribution, SendableRecordBatchStream};
use crate::display::DisplayableExecutionPlan;
-use crate::{DisplayFormatType, ExecutionPlan, Partitioning, Statistics};
+use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
use arrow::{array::StringBuilder, datatypes::SchemaRef,
record_batch::RecordBatch};
use datafusion_common::{internal_err, DataFusionError, Result};
@@ -195,11 +195,6 @@ impl ExecutionPlan for AnalyzeExec {
futures::stream::once(output),
)))
}
-
- fn statistics(&self) -> Result<Statistics> {
- // Statistics an an ANALYZE plan are not relevant
- Ok(Statistics::new_unknown(&self.schema()))
- }
}
/// Creates the ouput of AnalyzeExec as a RecordBatch
diff --git a/datafusion/physical-plan/src/explain.rs
b/datafusion/physical-plan/src/explain.rs
index 81b8f99441..e4904ddd34 100644
--- a/datafusion/physical-plan/src/explain.rs
+++ b/datafusion/physical-plan/src/explain.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
use super::expressions::PhysicalSortExpr;
use super::{DisplayAs, SendableRecordBatchStream};
use crate::stream::RecordBatchStreamAdapter;
-use crate::{DisplayFormatType, ExecutionPlan, Partitioning, Statistics};
+use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
use arrow::{array::StringBuilder, datatypes::SchemaRef,
record_batch::RecordBatch};
use datafusion_common::display::StringifiedPlan;
@@ -167,11 +167,6 @@ impl ExecutionPlan for ExplainExec {
futures::stream::iter(vec![Ok(record_batch)]),
)))
}
-
- fn statistics(&self) -> Result<Statistics> {
- // Statistics an EXPLAIN plan are not relevant
- Ok(Statistics::new_unknown(&self.schema()))
- }
}
/// If this plan should be shown, given the previous plan that was
diff --git a/datafusion/physical-plan/src/insert.rs
b/datafusion/physical-plan/src/insert.rs
index d1f2706930..627d58e137 100644
--- a/datafusion/physical-plan/src/insert.rs
+++ b/datafusion/physical-plan/src/insert.rs
@@ -25,7 +25,6 @@ use std::sync::Arc;
use super::expressions::PhysicalSortExpr;
use super::{
DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning,
SendableRecordBatchStream,
- Statistics,
};
use crate::metrics::MetricsSet;
use crate::stream::RecordBatchStreamAdapter;
@@ -276,10 +275,6 @@ impl ExecutionPlan for FileSinkExec {
stream,
)))
}
-
- fn statistics(&self) -> Result<Statistics> {
- Ok(Statistics::new_unknown(&self.schema()))
- }
}
/// Create a output record batch with a count
diff --git a/datafusion/physical-plan/src/lib.rs
b/datafusion/physical-plan/src/lib.rs
index d7987ba95a..b2f81579f8 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -231,8 +231,12 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
None
}
- /// Returns the global output statistics for this `ExecutionPlan` node.
- fn statistics(&self) -> Result<Statistics>;
+ /// Returns statistics for this `ExecutionPlan` node. If statistics are not
+ /// available, should return [`Statistics::new_unknown`] (the default), not
+ /// an error.
+ fn statistics(&self) -> Result<Statistics> {
+ Ok(Statistics::new_unknown(&self.schema()))
+ }
}
/// Indicate whether a data exchange is needed for the input of `plan`, which
will be very helpful
diff --git a/datafusion/physical-plan/src/streaming.rs
b/datafusion/physical-plan/src/streaming.rs
index 7bfa7e2cee..27f03b727c 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -26,7 +26,7 @@ use crate::stream::RecordBatchStreamAdapter;
use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
use arrow::datatypes::SchemaRef;
-use datafusion_common::{internal_err, plan_err, DataFusionError, Result,
Statistics};
+use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
use datafusion_execution::TaskContext;
use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
@@ -187,8 +187,4 @@ impl ExecutionPlan for StreamingTableExec {
None => stream,
})
}
-
- fn statistics(&self) -> Result<Statistics> {
- Ok(Statistics::new_unknown(&self.schema()))
- }
}
diff --git a/datafusion/physical-plan/src/test/exec.rs
b/datafusion/physical-plan/src/test/exec.rs
index f90f4231c6..71e6cba674 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -453,10 +453,6 @@ impl ExecutionPlan for ErrorExec {
) -> Result<SendableRecordBatchStream> {
internal_err!("ErrorExec, unsurprisingly, errored in partition
{partition}")
}
-
- fn statistics(&self) -> Result<Statistics> {
- Ok(Statistics::new_unknown(&self.schema()))
- }
}
/// A mock execution plan that simply returns the provided statistics
@@ -627,10 +623,6 @@ impl ExecutionPlan for BlockingExec {
_refs: Arc::clone(&self.refs),
}))
}
-
- fn statistics(&self) -> Result<Statistics> {
- unimplemented!()
- }
}
/// A [`RecordBatchStream`] that is pending forever.
@@ -764,10 +756,6 @@ impl ExecutionPlan for PanicExec {
ready: false,
}))
}
-
- fn statistics(&self) -> Result<Statistics> {
- unimplemented!()
- }
}
/// A [`RecordBatchStream`] that yields every other batch and panics
diff --git a/datafusion/physical-plan/src/unnest.rs
b/datafusion/physical-plan/src/unnest.rs
index ed64735e5a..30f109953c 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -25,7 +25,7 @@ use super::DisplayAs;
use crate::{
expressions::Column, DisplayFormatType, Distribution,
EquivalenceProperties,
ExecutionPlan, Partitioning, PhysicalExpr, PhysicalSortExpr,
RecordBatchStream,
- SendableRecordBatchStream, Statistics,
+ SendableRecordBatchStream,
};
use arrow::array::{
@@ -159,10 +159,6 @@ impl ExecutionPlan for UnnestExec {
unnest_time: 0,
}))
}
-
- fn statistics(&self) -> Result<Statistics> {
- Ok(Statistics::new_unknown(&self.schema()))
- }
}
/// A stream that issues [RecordBatch]es with unnested column data.