mustafasrepo commented on code in PR #11875:
URL: https://github.com/apache/datafusion/pull/11875#discussion_r1711633095
##########
datafusion/physical-plan/src/limit.rs:
##########
@@ -380,56 +321,110 @@ impl ExecutionPlan for LocalLimitExec {
}
fn statistics(&self) -> Result<Statistics> {
- let input_stats = self.input.statistics()?;
- let col_stats = Statistics::unknown_column(&self.schema());
- let stats = match input_stats {
- // if the input does not reach the limit globally, return input
stats
- Statistics {
- num_rows: Precision::Exact(nr),
- ..
- }
- | Statistics {
- num_rows: Precision::Inexact(nr),
- ..
- } if nr <= self.fetch => input_stats,
- // if the input is greater than the limit, the num_row will be
greater
- // than the limit because the partitions will be limited separately
- // the statistic
- Statistics {
- num_rows: Precision::Exact(nr),
- ..
- } if nr > self.fetch => Statistics {
- num_rows: Precision::Exact(self.fetch),
- // this is not actually exact, but will be when GlobalLimit is
applied
- // TODO stats: find a more explicit way to vehiculate this
information
- column_statistics: col_stats,
- total_byte_size: Precision::Absent,
- },
- Statistics {
- num_rows: Precision::Inexact(nr),
- ..
- } if nr > self.fetch => Statistics {
- num_rows: Precision::Inexact(self.fetch),
- // this is not actually exact, but will be when GlobalLimit is
applied
- // TODO stats: find a more explicit way to vehiculate this
information
- column_statistics: col_stats,
- total_byte_size: Precision::Absent,
- },
- _ => Statistics {
- // the result output row number will always be no greater than
the limit number
- num_rows: Precision::Inexact(
- self.fetch
- *
self.properties().output_partitioning().partition_count(),
- ),
-
- column_statistics: col_stats,
- total_byte_size: Precision::Absent,
- },
- };
- Ok(stats)
+ statistics_with_fetch(
+ self.input.statistics()?,
+ self.schema(),
+ Some(self.fetch),
+ 0,
+ self.properties().partitioning.partition_count(),
+ )
+ }
+
+ fn fetch(&self) -> Option<usize> {
+ Some(self.fetch)
+ }
+
+ fn supports_limit_pushdown(&self) -> bool {
+ true
}
}
+/// Calculates the statistics for the operator when fetch and skip is used in
the operator
+/// (Output row count can be estimated in the presence of fetch and skip
information).
+/// using the input statistics information.
+pub fn statistics_with_fetch(
Review Comment:
This will be much better, and will make it visible.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]