Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1542#discussion_r153068582 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonPreAggregateRules.scala --- @@ -797,33 +798,59 @@ object CarbonPreInsertionCasts extends Rule[LogicalPlan] { * @return */ private def transformAggregatePlan(logicalPlan: LogicalPlan): LogicalPlan = { + val validExpressionsMap = scala.collection.mutable.LinkedHashMap.empty[String, NamedExpression] logicalPlan transform { case aggregate@Aggregate(_, aExp, _) => - val newExpressions = aExp.flatMap { - case alias@Alias(attrExpression: AggregateExpression, _) => - attrExpression.aggregateFunction match { - case Average(attr: AttributeReference) => - Seq(Alias(attrExpression - .copy(aggregateFunction = Sum(attr), - resultId = NamedExpression.newExprId), attr.name + "_sum")(), - Alias(attrExpression - .copy(aggregateFunction = Count(attr), - resultId = NamedExpression.newExprId), attr.name + "_count")()) - case Average(cast@Cast(attr: AttributeReference, _)) => - Seq(Alias(attrExpression - .copy(aggregateFunction = Sum(cast), - resultId = NamedExpression.newExprId), - attr.name + "_sum")(), - Alias(attrExpression - .copy(aggregateFunction = Count(cast), - resultId = NamedExpression.newExprId), attr.name + "_count")()) - case _ => Seq(alias) - } - case namedExpr: NamedExpression => Seq(namedExpr) + aExp.foreach { + case alias: Alias => + validExpressionsMap ++= validateAggregateFunctionAndGetAlias(alias) --- End diff -- Is the duplicate columns will be removed from aggexpressions? For example agg table is created with sum(col1) and avg(col1) then aggregation table should be created with sum(col1) and count(col1) only. sum(col1) should not be duplicated. Is this handled here?
---