This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 7a41907449 Docs: try and clarify what `PartitionEvaluator` functions
are called (#6869)
7a41907449 is described below
commit 7a4190744964ca23191e48b29bafccebfa838cd9
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Jul 7 03:57:46 2023 -0400
Docs: try and clarify what `PartitionEvaluator` functions are called (#6869)
---
datafusion/expr/src/partition_evaluator.rs | 63 +++++++++++++++++-------------
1 file changed, 36 insertions(+), 27 deletions(-)
diff --git a/datafusion/expr/src/partition_evaluator.rs
b/datafusion/expr/src/partition_evaluator.rs
index 169044c803..f0c425ca59 100644
--- a/datafusion/expr/src/partition_evaluator.rs
+++ b/datafusion/expr/src/partition_evaluator.rs
@@ -66,19 +66,19 @@ use crate::window_state::WindowAggState;
/// ```
///
/// Different methods on this trait will be called depending on the
-/// capabilities described by [`Self::supports_bounded_execution`],
-/// [`Self::uses_window_frame`], and [`Self::include_rank`],
+/// capabilities described by [`supports_bounded_execution`],
+/// [`uses_window_frame`], and [`include_rank`],
///
-/// # Stateless `PartitionEvaluator`
+/// # Stateless `PartitionEvaluator`s
///
-/// In this case, either [`Self::evaluate_all`] or
[`Self::evaluate_all_with_rank`] is called with values for the
-/// entire partition.
+/// In this case, `PartitionEvaluator` holds no state, and either
+/// [`evaluate_all`] or [`evaluate_all_with_rank`] is called with
+/// values for the entire partition.
///
-/// # Stateful `PartitionEvaluator`
+/// # Stateful `PartitionEvaluator`s
///
-/// In this case, [`Self::evaluate`] is called to calculate
-/// the results of the window function incrementally for each new
-/// batch.
+/// In this case, [`Self::evaluate`] is called to calculate the window
+/// function incrementally for each new batch.
///
/// For example, when computing `ROW_NUMBER` incrementally,
/// [`Self::evaluate`] will be called multiple times with
@@ -87,16 +87,23 @@ use crate::window_state::WindowAggState;
/// previous batch. The previous row number is saved and restored as
/// the state.
///
-/// When implementing a new `PartitionEvaluator`,
-/// `uses_window_frame` and `supports_bounded_execution` flags determine which
evaluation method will be called
-/// during runtime. Implement corresponding evaluator according to table below.
+/// When implementing a new `PartitionEvaluator`, implement
+/// corresponding evaluator according to table below.
///
-/// |uses_window_frame|supports_bounded_execution|function_to_implement|
-/// |---|---|----|
-/// |false|false|`evaluate_all` (if we were to implement `PERCENT_RANK` it
would end up in this quadrant, we cannot produce any result without seeing
whole data)|
-/// |false|true|`evaluate` (optionally can also implement `evaluate_all` for
more optimized implementation. However, there will be default implementation
that is suboptimal) . If we were to implement `ROW_NUMBER` it will end up in
this quadrant. Example `OddRowNumber` showcases this use case|
-/// |true|false|`evaluate` (I think as long as `uses_window_frame` is `true`.
There is no way for `supports_bounded_execution` to be false). I couldn't come
up with any example for this quadrant |
-/// |true|true|`evaluate`. If we were to implement `FIRST_VALUE`, it would end
up in this quadrant|.
+///
+///
|[`uses_window_frame`]|[`supports_bounded_execution`]|[`include_rank`]|function_to_implement|
+/// |---|---|----|----|
+/// |false (default) |false (default) |false (default) |
[`evaluate_all`] |
+/// |false |true |false |
[`evaluate`] |
+/// |false |true/false |true |
[`evaluate_all_with_rank`] |
+/// |true |true/false |true/false |
[`evaluate`] |
+///
+/// [`evaluate`]: Self::evaluate
+/// [`evaluate_all`]: Self::evaluate_all
+/// [`evaluate_all_with_rank`]: Self::evaluate_all_with_rank
+/// [`uses_window_frame`]: Self::uses_window_frame
+/// [`include_rank`]: Self::include_rank
+/// [`supports_bounded_execution`]: Self::supports_bounded_execution
pub trait PartitionEvaluator: Debug + Send {
/// Updates the internal state for window function
///
@@ -129,9 +136,14 @@ pub trait PartitionEvaluator: Debug + Send {
Ok(())
}
- /// If `uses_window_frame` flag is `false`. This method is used to
calculate required range for the window function
- /// Generally there is no required range, hence by default this returns
smallest range(current row). e.g seeing current row
- /// is enough to calculate window result (such as row_number, rank, etc)
+ /// If `uses_window_frame` flag is `false`. This method is used to
+ /// calculate required range for the window function during
+ /// stateful execution.
+ ///
+ /// Generally there is no required range, hence by default this
+ /// returns smallest range(current row). e.g seeing current row is
+ /// enough to calculate window result (such as row_number, rank,
+ /// etc)
fn get_range(&self, idx: usize, _n_rows: usize) -> Result<Range<usize>> {
if self.uses_window_frame() {
Err(DataFusionError::Execution(
@@ -259,8 +271,7 @@ pub trait PartitionEvaluator: Debug + Send {
/// Can the window function be incrementally computed using
/// bounded memory?
///
- /// If this function returns true, implement
[`PartitionEvaluator::evaluate`] and
- /// [`PartitionEvaluator::update_state`].
+ /// See the table on [`Self`] for what functions to implement
fn supports_bounded_execution(&self) -> bool {
false
}
@@ -268,16 +279,14 @@ pub trait PartitionEvaluator: Debug + Send {
/// Does the window function use the values from the window frame,
/// if one is specified?
///
- /// If this function returns true, implement
[`PartitionEvaluator::evaluate_all`].
- ///
- /// See details and examples on [`PartitionEvaluator::evaluate_all`].
+ /// See the table on [`Self`] for what functions to implement
fn uses_window_frame(&self) -> bool {
false
}
/// Can this function be evaluated with (only) rank
///
- /// If `include_rank` is true, implement
[`PartitionEvaluator::evaluate_all_with_rank`]
+ /// See the table on [`Self`] for what functions to implement
fn include_rank(&self) -> bool {
false
}