jackwener commented on code in PR #4489: URL: https://github.com/apache/arrow-datafusion/pull/4489#discussion_r1038907183
########## datafusion/core/src/physical_plan/aggregates/hash.rs: ########## @@ -504,7 +510,7 @@ fn create_batch_from_map( }) .collect::<Result<Vec<_>>>()?; - // add state / evaluated arrays + // next, output aggregates: either intermediate state or final output Review Comment: ```suggestion // next, output aggregates: either intermediate state or final output ``` ########## datafusion/expr/src/accumulator.rs: ########## @@ -31,28 +31,48 @@ use std::fmt::Debug; /// * update its state from multiple accumulators' states via `merge_batch` /// * compute the final value from its internal state via `evaluate` pub trait Accumulator: Send + Sync + Debug { - /// Returns the state of the accumulator at the end of the accumulation. - /// in the case of an average on which we track `sum` and `n`, this function should return a vector - /// of two values, sum and n. + /// Returns the partal intermediate state of the accumulator. This Review Comment: ```suggestion /// Returns the partial intermediate state of the accumulator. This ``` ########## datafusion/expr/src/accumulator.rs: ########## @@ -31,28 +31,48 @@ use std::fmt::Debug; /// * update its state from multiple accumulators' states via `merge_batch` /// * compute the final value from its internal state via `evaluate` pub trait Accumulator: Send + Sync + Debug { - /// Returns the state of the accumulator at the end of the accumulation. - /// in the case of an average on which we track `sum` and `n`, this function should return a vector - /// of two values, sum and n. + /// Returns the partal intermediate state of the accumulator. This + /// partial state is serialied as `Arrays` and then combined with + /// other partial states from different instances of this + /// accumulator (that ran on different partitions, for + /// example). + /// + /// The state can be a different type than the output of the + /// [`Accumulator`] + /// + /// See [`merge_batch`] for more details on the merging process. + /// + /// For example, in the case of an average, for which we track `sum` and `n`, + /// this function should return a vector of two values, sum and n. fn state(&self) -> Result<Vec<AggregateState>>; /// Updates the accumulator's state from a vector of arrays. fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()>; - /// Retracts an update (caused by the given inputs) to accumulator's state. - /// Inverse operation of the `update_batch` operation. This method must be - /// for accumulators that should support bounded OVER aggregates. + /// Retracts an update (caused by the given inputs) to + /// accumulator's state. + /// + /// This is the inverse operation of [`update_batch`] and is used + /// to incrementally calculate window aggregates where the OVER + /// clause defines a bounded window. fn retract_batch(&mut self, _values: &[ArrayRef]) -> Result<()> { // TODO add retract for all accumulators Err(DataFusionError::Internal( "Retract should be implemented for aggregate functions when used with custom window frame queries".to_string() )) } - /// updates the accumulator's state from a vector of states. + /// Updates the accumulator's state from an `Array` containing one + /// or more intermediate values. + /// + /// The `states` array passed was formed by cancatenating the Review Comment: ```suggestion /// The `states` array passed was formed by concatenating the ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org