Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/3247#discussion_r27717257 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala --- @@ -17,285 +17,159 @@ package org.apache.spark.sql.catalyst.expressions -import com.clearspring.analytics.stream.cardinality.HyperLogLog - import org.apache.spark.sql.types._ -import org.apache.spark.sql.catalyst.trees -import org.apache.spark.sql.catalyst.errors.TreeNodeException -import org.apache.spark.util.collection.OpenHashSet - -abstract class AggregateExpression extends Expression { - self: Product => - /** - * Creates a new instance that can be used to compute this aggregate expression for a group - * of input rows/ - */ - def newInstance(): AggregateFunction - /** - * [[AggregateExpression.eval]] should never be invoked because [[AggregateExpression]]'s are - * replaced with a physical aggregate operator at runtime. - */ - override def eval(input: Row = null): EvaluatedType = - throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}") -} +/** + * This is from org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode + * Just a hint for the UDAF developers which stage we are about to process, + * However, we probably don't want the developers knows so many details, here + * is just for keep consistent with Hive (when integrated with Hive), need to + * figure out if we have work around for that soon. + */ +@deprecated +trait Mode /** - * Represents an aggregation that has been rewritten to be performed in two steps. - * - * @param finalEvaluation an aggregate expression that evaluates to same final result as the - * original aggregation. - * @param partialEvaluations A sequence of [[NamedExpression]]s that can be computed on partial - * data sets and are required to compute the `finalEvaluation`. + * PARTIAL1: from original data to partial aggregation data: iterate() and + * terminatePartial() will be called. */ -case class SplitEvaluation( - finalEvaluation: Expression, - partialEvaluations: Seq[NamedExpression]) +@deprecated +case object PARTIAL1 extends Mode /** - * An [[AggregateExpression]] that can be partially computed without seeing all relevant tuples. - * These partial evaluations can then be combined to compute the actual answer. + * PARTIAL2: from partial aggregation data to partial aggregation data: + * merge() and terminatePartial() will be called. */ -abstract class PartialAggregate extends AggregateExpression { - self: Product => +@deprecated +case object PARTIAL2 extends Mode +/** + * FINAL: from partial aggregation to full aggregation: merge() and + * terminate() will be called. + */ +@deprecated +case object FINAL extends Mode +/** + * COMPLETE: from original data directly to full aggregation: iterate() and + * terminate() will be called. + */ +@deprecated --- End diff -- I'm exactly not sure, but can we only have two states (e.g., PARTIAL and FINAL) to satisfy your intention (`DISTINCT` are processed in `execution.Aggregate`)?
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org