mustafasrepo commented on code in PR #11875:
URL: https://github.com/apache/datafusion/pull/11875#discussion_r1711633095


##########
datafusion/physical-plan/src/limit.rs:
##########
@@ -380,56 +321,110 @@ impl ExecutionPlan for LocalLimitExec {
     }
 
     fn statistics(&self) -> Result<Statistics> {
-        let input_stats = self.input.statistics()?;
-        let col_stats = Statistics::unknown_column(&self.schema());
-        let stats = match input_stats {
-            // if the input does not reach the limit globally, return input 
stats
-            Statistics {
-                num_rows: Precision::Exact(nr),
-                ..
-            }
-            | Statistics {
-                num_rows: Precision::Inexact(nr),
-                ..
-            } if nr <= self.fetch => input_stats,
-            // if the input is greater than the limit, the num_row will be 
greater
-            // than the limit because the partitions will be limited separately
-            // the statistic
-            Statistics {
-                num_rows: Precision::Exact(nr),
-                ..
-            } if nr > self.fetch => Statistics {
-                num_rows: Precision::Exact(self.fetch),
-                // this is not actually exact, but will be when GlobalLimit is 
applied
-                // TODO stats: find a more explicit way to vehiculate this 
information
-                column_statistics: col_stats,
-                total_byte_size: Precision::Absent,
-            },
-            Statistics {
-                num_rows: Precision::Inexact(nr),
-                ..
-            } if nr > self.fetch => Statistics {
-                num_rows: Precision::Inexact(self.fetch),
-                // this is not actually exact, but will be when GlobalLimit is 
applied
-                // TODO stats: find a more explicit way to vehiculate this 
information
-                column_statistics: col_stats,
-                total_byte_size: Precision::Absent,
-            },
-            _ => Statistics {
-                // the result output row number will always be no greater than 
the limit number
-                num_rows: Precision::Inexact(
-                    self.fetch
-                        * 
self.properties().output_partitioning().partition_count(),
-                ),
-
-                column_statistics: col_stats,
-                total_byte_size: Precision::Absent,
-            },
-        };
-        Ok(stats)
+        statistics_with_fetch(
+            self.input.statistics()?,
+            self.schema(),
+            Some(self.fetch),
+            0,
+            self.properties().partitioning.partition_count(),
+        )
+    }
+
+    fn fetch(&self) -> Option<usize> {
+        Some(self.fetch)
+    }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        true
     }
 }
 
+/// Calculates the statistics for the operator when fetch and skip is used in 
the operator
+/// (Output row count can be estimated in the presence of fetch and skip 
information).
+/// using the input statistics information.
+pub fn statistics_with_fetch(

Review Comment:
   This will be much better, and will make it visible.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to