This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 681559ea46e [SPARK-42481][CONNECT] Implement agg.{max,min,mean,count,avg,sum} 681559ea46e is described below commit 681559ea46ee160f5ece8d2743aab186a7a45aa3 Author: Rui Wang <rui.w...@databricks.com> AuthorDate: Fri Feb 17 20:49:40 2023 -0400 [SPARK-42481][CONNECT] Implement agg.{max,min,mean,count,avg,sum} ### What changes were proposed in this pull request? Adding more API to `agg` including max,min,mean,count,avg,sum. ### Why are the changes needed? API coverage ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? UT Closes #40070 from amaliujia/rw-agg2. Authored-by: Rui Wang <rui.w...@databricks.com> Signed-off-by: Herman van Hovell <her...@databricks.com> (cherry picked from commit 74f53b8d008b8fd570439d5cc56a0c0753ff4910) Signed-off-by: Herman van Hovell <her...@databricks.com> --- .../main/scala/org/apache/spark/sql/Dataset.scala | 9 ++ .../spark/sql/RelationalGroupedDataset.scala | 157 +++++++++++++++------ .../apache/spark/sql/PlanGenerationTestSuite.scala | 42 ++++++ .../explain-results/groupby_agg_columns.explain | 2 + .../explain-results/groupby_avg.explain | 2 + .../explain-results/groupby_count.explain | 2 + .../explain-results/groupby_max.explain | 2 + .../explain-results/groupby_mean.explain | 2 + .../explain-results/groupby_min.explain | 2 + .../explain-results/groupby_sum.explain | 2 + .../resources/query-tests/queries/groupby_agg.json | 3 +- .../query-tests/queries/groupby_agg.proto.bin | Bin 188 -> 186 bytes .../query-tests/queries/groupby_agg_columns.json | 34 +++++ .../queries/groupby_agg_columns.proto.bin | 7 + .../resources/query-tests/queries/groupby_avg.json | 34 +++++ .../query-tests/queries/groupby_avg.proto.bin | 7 + .../query-tests/queries/groupby_count.json | 30 ++++ .../query-tests/queries/groupby_count.proto.bin | 6 + .../resources/query-tests/queries/groupby_max.json | 34 +++++ .../query-tests/queries/groupby_max.proto.bin | 7 + .../query-tests/queries/groupby_mean.json | 34 +++++ .../query-tests/queries/groupby_mean.proto.bin | 7 + .../resources/query-tests/queries/groupby_min.json | 34 +++++ .../query-tests/queries/groupby_min.proto.bin | 7 + .../resources/query-tests/queries/groupby_sum.json | 34 +++++ .../query-tests/queries/groupby_sum.proto.bin | 7 + 26 files changed, 464 insertions(+), 43 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala index c39fc6100f5..3c34b45fccb 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2065,6 +2065,15 @@ class Dataset[T] private[sql] (val session: SparkSession, private[sql] val plan: collectResult().iterator.asInstanceOf[java.util.Iterator[T]] } + /** + * Returns the number of rows in the Dataset. + * @group action + * @since 3.4.0 + */ + def count(): Long = { + groupBy().count().collect().head.getLong(0) + } + private def buildRepartition(numPartitions: Int, shuffle: Boolean): Dataset[T] = { session.newDataset { builder => builder.getRepartitionBuilder diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala index a3dfcb01fdc..a6d3dc2e468 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala @@ -39,14 +39,14 @@ class RelationalGroupedDataset protected[sql] ( private[sql] val df: DataFrame, private[sql] val groupingExprs: Seq[proto.Expression]) { - private[this] def toDF(aggExprs: Seq[proto.Expression]): DataFrame = { + private[this] def toDF(aggExprs: Seq[Column]): DataFrame = { // TODO: support other GroupByType such as Rollup, Cube, Pivot. df.session.newDataset { builder => builder.getAggregateBuilder .setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY) .setInput(df.plan.getRoot) .addAllGroupingExpressions(groupingExprs.asJava) - .addAllAggregateExpressions(aggExprs.asJava) + .addAllAggregateExpressions(aggExprs.map(e => e.expr).asJava) } } @@ -67,7 +67,7 @@ class RelationalGroupedDataset protected[sql] ( */ def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = { toDF((aggExpr +: aggExprs).map { case (colName, expr) => - strToExpr(expr, df(colName).expr) + strToColumn(expr, df(colName)) }) } @@ -88,7 +88,7 @@ class RelationalGroupedDataset protected[sql] ( */ def agg(exprs: Map[String, String]): DataFrame = { toDF(exprs.map { case (colName, expr) => - strToExpr(expr, df(colName).expr) + strToColumn(expr, df(colName)) }.toSeq) } @@ -109,44 +109,119 @@ class RelationalGroupedDataset protected[sql] ( agg(exprs.asScala.toMap) } - private[this] def strToExpr(expr: String, inputExpr: proto.Expression): proto.Expression = { - val builder = proto.Expression.newBuilder() - + private[this] def strToColumn(expr: String, inputExpr: Column): Column = { expr.toLowerCase(Locale.ROOT) match { - // We special handle a few cases that have alias that are not in function registry. - case "avg" | "average" | "mean" => - builder.getUnresolvedFunctionBuilder - .setFunctionName("avg") - .addArguments(inputExpr) - .setIsDistinct(false) - case "stddev" | "std" => - builder.getUnresolvedFunctionBuilder - .setFunctionName("stddev") - .addArguments(inputExpr) - .setIsDistinct(false) - // Also special handle count because we need to take care count(*). - case "count" | "size" => - // Turn count(*) into count(1) - inputExpr match { - case s if s.hasUnresolvedStar => - val exprBuilder = proto.Expression.newBuilder - exprBuilder.getLiteralBuilder.setInteger(1) - builder.getUnresolvedFunctionBuilder - .setFunctionName("count") - .addArguments(exprBuilder) - .setIsDistinct(false) - case _ => - builder.getUnresolvedFunctionBuilder - .setFunctionName("count") - .addArguments(inputExpr) - .setIsDistinct(false) - } - case name => - builder.getUnresolvedFunctionBuilder - .setFunctionName(name) - .addArguments(inputExpr) - .setIsDistinct(false) + case "avg" | "average" | "mean" => functions.avg(inputExpr) + case "stddev" | "std" => functions.stddev(inputExpr) + case "count" | "size" => functions.count(inputExpr) + case name => Column.fn(name, inputExpr) } - builder.build() + } + + /** + * Compute aggregates by specifying a series of aggregate columns. Note that this function by + * default retains the grouping columns in its output. To not retain grouping columns, set + * `spark.sql.retainGroupColumns` to false. + * + * The available aggregate methods are defined in [[org.apache.spark.sql.functions]]. + * + * {{{ + * // Selects the age of the oldest employee and the aggregate expense for each department + * + * // Scala: + * import org.apache.spark.sql.functions._ + * df.groupBy("department").agg(max("age"), sum("expense")) + * + * // Java: + * import static org.apache.spark.sql.functions.*; + * df.groupBy("department").agg(max("age"), sum("expense")); + * }}} + * + * Note that before Spark 1.4, the default behavior is to NOT retain grouping columns. To change + * to that behavior, set config variable `spark.sql.retainGroupColumns` to `false`. + * {{{ + * // Scala, 1.3.x: + * df.groupBy("department").agg($"department", max("age"), sum("expense")) + * + * // Java, 1.3.x: + * df.groupBy("department").agg(col("department"), max("age"), sum("expense")); + * }}} + * + * @since 3.4.0 + */ + @scala.annotation.varargs + def agg(expr: Column, exprs: Column*): DataFrame = { + toDF((expr +: exprs).map { case c => + c + // TODO: deal with typed columns. + }) + } + + /** + * Count the number of rows for each group. The resulting `DataFrame` will also contain the + * grouping columns. + * + * @since 3.4.0 + */ + def count(): DataFrame = toDF(Seq(functions.count(functions.lit(1)).alias("count"))) + + /** + * Compute the average value for each numeric columns for each group. This is an alias for + * `avg`. The resulting `DataFrame` will also contain the grouping columns. When specified + * columns are given, only compute the average values for them. + * + * @since 3.4.0 + */ + @scala.annotation.varargs + def mean(colNames: String*): DataFrame = { + toDF(colNames.map(colName => functions.mean(colName))) + } + + /** + * Compute the max value for each numeric columns for each group. The resulting `DataFrame` will + * also contain the grouping columns. When specified columns are given, only compute the max + * values for them. + * + * @since 3.4.0 + */ + @scala.annotation.varargs + def max(colNames: String*): DataFrame = { + toDF(colNames.map(colName => functions.max(colName))) + } + + /** + * Compute the mean value for each numeric columns for each group. The resulting `DataFrame` + * will also contain the grouping columns. When specified columns are given, only compute the + * mean values for them. + * + * @since 3.4.0 + */ + @scala.annotation.varargs + def avg(colNames: String*): DataFrame = { + toDF(colNames.map(colName => functions.avg(colName))) + } + + /** + * Compute the min value for each numeric column for each group. The resulting `DataFrame` will + * also contain the grouping columns. When specified columns are given, only compute the min + * values for them. + * + * @since 3.4.0 + */ + @scala.annotation.varargs + def min(colNames: String*): DataFrame = { + toDF(colNames.map(colName => functions.min(colName))) + } + + /** + * Compute the sum for each numeric columns for each group. The resulting `DataFrame` will also + * contain the grouping columns. When specified columns are given, only compute the sum for + * them. + * + * @since 3.4.0 + */ + @scala.annotation.varargs + def sum(colNames: String*): DataFrame = { + toDF(colNames.map(colName => functions.sum(colName))) } } diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index b591daef391..c0b3bef4fbc 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -1201,6 +1201,48 @@ class PlanGenerationTestSuite extends ConnectFunSuite with BeforeAndAfterAll wit "a" -> "count") } + test("groupby agg columns") { + simple + .groupBy(Column("id")) + .agg(functions.max("a"), functions.sum("b")) + } + + test("groupby max") { + simple + .groupBy(Column("id")) + .max("a", "b") + } + + test("groupby min") { + simple + .groupBy(Column("id")) + .min("a", "b") + } + + test("groupby mean") { + simple + .groupBy(Column("id")) + .mean("a", "b") + } + + test("groupby avg") { + simple + .groupBy(Column("id")) + .avg("a", "b") + } + + test("groupby sum") { + simple + .groupBy(Column("id")) + .sum("a", "b") + } + + test("groupby count") { + simple + .groupBy(Column("id")) + .count() + } + test("function lit") { select( fn.lit(fn.col("id")), diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_columns.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_columns.explain new file mode 100644 index 00000000000..86b919a3919 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_columns.explain @@ -0,0 +1,2 @@ +Aggregate [id#0L], [id#0L, max(a#0) AS max(a)#0, sum(b#0) AS sum(b)#0] ++- LocalRelation <empty>, [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_avg.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_avg.explain new file mode 100644 index 00000000000..e7c559a1bf6 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_avg.explain @@ -0,0 +1,2 @@ +Aggregate [id#0L], [id#0L, avg(a#0) AS avg(a)#0, avg(b#0) AS avg(b)#0] ++- LocalRelation <empty>, [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_count.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_count.explain new file mode 100644 index 00000000000..dd08ec3bd59 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_count.explain @@ -0,0 +1,2 @@ +Aggregate [id#0L], [id#0L, count(1) AS count#0L] ++- LocalRelation <empty>, [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_max.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_max.explain new file mode 100644 index 00000000000..8f00ba848ca --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_max.explain @@ -0,0 +1,2 @@ +Aggregate [id#0L], [id#0L, max(a#0) AS max(a)#0, max(b#0) AS max(b)#0] ++- LocalRelation <empty>, [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_mean.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_mean.explain new file mode 100644 index 00000000000..e7c559a1bf6 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_mean.explain @@ -0,0 +1,2 @@ +Aggregate [id#0L], [id#0L, avg(a#0) AS avg(a)#0, avg(b#0) AS avg(b)#0] ++- LocalRelation <empty>, [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_min.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_min.explain new file mode 100644 index 00000000000..b46adbdc263 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_min.explain @@ -0,0 +1,2 @@ +Aggregate [id#0L], [id#0L, min(a#0) AS min(a)#0, min(b#0) AS min(b)#0] ++- LocalRelation <empty>, [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_sum.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_sum.explain new file mode 100644 index 00000000000..5d6b075bbe6 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_sum.explain @@ -0,0 +1,2 @@ +Aggregate [id#0L], [id#0L, sum(a#0) AS sum(a)#0L, sum(b#0) AS sum(b)#0] ++- LocalRelation <empty>, [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.json index 7838a89974d..4cf2ae0c8c2 100644 --- a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.json +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.json @@ -69,8 +69,7 @@ "unresolvedFunction": { "functionName": "count", "arguments": [{ - "literal": { - "integer": 1 + "unresolvedStar": { } }] } diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin index 9c6d1cca8a4..eed57649c45 100644 Binary files a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json new file mode 100644 index 00000000000..fd2264fd2ae --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json @@ -0,0 +1,34 @@ +{ + "aggregate": { + "input": { + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double\u003e" + } + }, + "groupType": "GROUP_TYPE_GROUPBY", + "groupingExpressions": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "id" + } + }], + "aggregateExpressions": [{ + "unresolvedFunction": { + "functionName": "max", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }, { + "unresolvedFunction": { + "functionName": "sum", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin new file mode 100644 index 00000000000..b12dd5229db --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin @@ -0,0 +1,7 @@ +JP +$Z" struct<id:bigint,a:int,b:double> +id" +max +a" +sum +b \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.json new file mode 100644 index 00000000000..df4216bdd51 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.json @@ -0,0 +1,34 @@ +{ + "aggregate": { + "input": { + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double\u003e" + } + }, + "groupType": "GROUP_TYPE_GROUPBY", + "groupingExpressions": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "id" + } + }], + "aggregateExpressions": [{ + "unresolvedFunction": { + "functionName": "avg", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }, { + "unresolvedFunction": { + "functionName": "avg", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin new file mode 100644 index 00000000000..33cbb49f1fe --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin @@ -0,0 +1,7 @@ +JP +$Z" struct<id:bigint,a:int,b:double> +id" +avg +a" +avg +b \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.json new file mode 100644 index 00000000000..c28c167f21b --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.json @@ -0,0 +1,30 @@ +{ + "aggregate": { + "input": { + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double\u003e" + } + }, + "groupType": "GROUP_TYPE_GROUPBY", + "groupingExpressions": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "id" + } + }], + "aggregateExpressions": [{ + "alias": { + "expr": { + "unresolvedFunction": { + "functionName": "count", + "arguments": [{ + "literal": { + "integer": 1 + } + }] + } + }, + "name": ["count"] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin new file mode 100644 index 00000000000..d3920650eb5 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin @@ -0,0 +1,6 @@ +JL +$Z" struct<id:bigint,a:int,b:double> +id"2 + +count +0count \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.json new file mode 100644 index 00000000000..262232063dd --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.json @@ -0,0 +1,34 @@ +{ + "aggregate": { + "input": { + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double\u003e" + } + }, + "groupType": "GROUP_TYPE_GROUPBY", + "groupingExpressions": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "id" + } + }], + "aggregateExpressions": [{ + "unresolvedFunction": { + "functionName": "max", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }, { + "unresolvedFunction": { + "functionName": "max", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin new file mode 100644 index 00000000000..e43c9e3e325 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin @@ -0,0 +1,7 @@ +JP +$Z" struct<id:bigint,a:int,b:double> +id" +max +a" +max +b \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.json new file mode 100644 index 00000000000..df4216bdd51 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.json @@ -0,0 +1,34 @@ +{ + "aggregate": { + "input": { + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double\u003e" + } + }, + "groupType": "GROUP_TYPE_GROUPBY", + "groupingExpressions": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "id" + } + }], + "aggregateExpressions": [{ + "unresolvedFunction": { + "functionName": "avg", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }, { + "unresolvedFunction": { + "functionName": "avg", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin new file mode 100644 index 00000000000..33cbb49f1fe --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin @@ -0,0 +1,7 @@ +JP +$Z" struct<id:bigint,a:int,b:double> +id" +avg +a" +avg +b \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.json new file mode 100644 index 00000000000..c6c55dde8b4 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.json @@ -0,0 +1,34 @@ +{ + "aggregate": { + "input": { + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double\u003e" + } + }, + "groupType": "GROUP_TYPE_GROUPBY", + "groupingExpressions": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "id" + } + }], + "aggregateExpressions": [{ + "unresolvedFunction": { + "functionName": "min", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }, { + "unresolvedFunction": { + "functionName": "min", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin new file mode 100644 index 00000000000..c7ad1785cd8 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin @@ -0,0 +1,7 @@ +JP +$Z" struct<id:bigint,a:int,b:double> +id" +min +a" +min +b \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.json new file mode 100644 index 00000000000..48d3820dea9 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.json @@ -0,0 +1,34 @@ +{ + "aggregate": { + "input": { + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double\u003e" + } + }, + "groupType": "GROUP_TYPE_GROUPBY", + "groupingExpressions": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "id" + } + }], + "aggregateExpressions": [{ + "unresolvedFunction": { + "functionName": "sum", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }, { + "unresolvedFunction": { + "functionName": "sum", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin new file mode 100644 index 00000000000..673b7452703 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin @@ -0,0 +1,7 @@ +JP +$Z" struct<id:bigint,a:int,b:double> +id" +sum +a" +sum +b \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org