This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 9e6f2dd7268 [SPARK-39320][SQL] Support aggregate function `MEDIAN` 9e6f2dd7268 is described below commit 9e6f2dd72686a9ac44fd4573b5a408f8a8e55fe1 Author: Jiaan Geng <belie...@163.com> AuthorDate: Fri Jun 3 08:23:22 2022 +0300 [SPARK-39320][SQL] Support aggregate function `MEDIAN` ### What changes were proposed in this pull request? Many mainstream database supports aggregate function `MEDIAN`. **Syntax:** Aggregate function `MEDIAN( <expr> )` Window function `MEDIAN( <expr> ) OVER ( [ PARTITION BY <expr2> ] )` **Arguments:** expr: The expression must evaluate to a numeric data type (INTEGER, FLOAT, DECIMAL, or equivalent). **Examples**: ``` select k, median(v) from aggr group by k order by k; +---+-----------+ | K | MEDIAN(V) | |---+-----------| | 1 | 20.00000 | | 2 | 22.50000 | | 3 | NULL | +---+-----------+ ``` ### Why are the changes needed? The mainstream database supports `MEDIAN` show below: **Snowflake** https://docs.snowflake.com/en/sql-reference/functions/median.html **Oracle** https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/MEDIAN.html#GUID-DE15705A-AC18-4416-8487-B9E1D70CE01A **ClickHouse** https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/median **Redshift** https://docs.aws.amazon.com/redshift/latest/dg/r_MEDIAN.html **Teradata** https://docs.teradata.com/r/Teradata-VantageTM-SQL-Functions-Expressions-and-Predicates/March-2019/Ordered-Analytical/Window-Aggregate-Functions/MEDIAN **DB2** https://www.ibm.com/docs/en/db2/11.5?topic=functions-median **Vertica** https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/Analytic/MEDIANAnalytic.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CAnalytic%20Functions%7C_____20 **H2** http://www.h2database.com/html/functions-aggregate.html#median **Sybase** https://infocenter.sybase.com/help/index.jsp?topic=/com.sybase.infocenter.dc01776.1601/doc/html/san1278453109663.html **Exasol** https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/median.htm **Yellowbrick** https://www.yellowbrick.com/docs/5.2/ybd_sqlref/median.html **Mariadb** https://mariadb.com/kb/en/median/ **Singlestore** https://docs.singlestore.com/db/v7.6/en/reference/sql-reference/aggregate-functions/median.html **InfluxDB** https://docs.influxdata.com/flux/v0.x/stdlib/universe/median/ ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? New tests. Closes #36714 from beliefer/SPARK-39320. Authored-by: Jiaan Geng <belie...@163.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../sql/catalyst/analysis/CheckAnalysis.scala | 5 +- .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/aggregate/percentiles.scala | 26 +- .../sql-functions/sql-expression-schema.md | 1 + .../test/resources/sql-tests/inputs/group-by.sql | 29 -- .../resources/sql-tests/inputs/percentiles.sql | 212 +++++++++++ .../src/test/resources/sql-tests/inputs/window.sql | 112 ------ .../resources/sql-tests/results/group-by.sql.out | 70 +--- .../sql-tests/results/percentiles.sql.out | 417 +++++++++++++++++++++ .../resources/sql-tests/results/window.sql.out | 229 +---------- 10 files changed, 661 insertions(+), 441 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index ed2e9ba2b6b..7635918279a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -21,7 +21,7 @@ import scala.collection.mutable import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.SubExprUtils._ -import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, PercentileCont, PercentileDisc} +import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Median, PercentileCont, PercentileDisc} import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, DecorrelateInnerQuery, InlineCTE} import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -243,7 +243,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { // Only allow window functions with an aggregate expression or an offset window // function or a Pandas window UDF. w.windowFunction match { - case agg @ AggregateExpression(_: PercentileCont | _: PercentileDisc, _, _, _, _) + case agg @ AggregateExpression( + _: PercentileCont | _: PercentileDisc | _: Median, _, _, _, _) if w.windowSpec.orderSpec.nonEmpty || w.windowSpec.frameSpecification != SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing) => failAnalysis( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 2155fb2efeb..09d3cd30cab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -477,6 +477,7 @@ object FunctionRegistry { expression[Min]("min"), expression[MinBy]("min_by"), expression[Percentile]("percentile"), + expression[Median]("median"), expression[Skewness]("skewness"), expression[ApproximatePercentile]("percentile_approx"), expression[ApproximatePercentile]("approx_percentile", true), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala index d55f0b7a831..3acb2f2cc97 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike} +import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike, UnaryLike} import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.types._ @@ -359,6 +359,30 @@ case class Percentile( ) } +@ExpressionDescription( + usage = "_FUNC_(col) - Returns the median of numeric or ansi interval column `col`.", + examples = """ + Examples: + > SELECT _FUNC_(col) FROM VALUES (0), (10) AS tab(col); + 5.0 + > SELECT _FUNC_(col) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col); + 5.0 + """, + group = "agg_funcs", + since = "3.4.0") +case class Median(child: Expression) + extends AggregateFunction + with RuntimeReplaceableAggregate + with ImplicitCastInputTypes + with UnaryLike[Expression] { + private lazy val percentile = new Percentile(child, Literal(0.5, DoubleType)) + override def replacement: Expression = percentile + override def nodeName: String = "median" + override def inputTypes: Seq[AbstractDataType] = percentile.inputTypes.take(1) + override protected def withNewChildInternal( + newChild: Expression): Median = this.copy(child = newChild) +} + /** * Return a percentile value based on a continuous distribution of * numeric or ansi interval column at the given percentage (specified in ORDER BY clause). diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 2d4eef39405..3bfc342939f 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -368,6 +368,7 @@ | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value | SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last_value(col):int> | | org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct<max(col):int> | | org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<max_by(x, y):string> | +| org.apache.spark.sql.catalyst.expressions.aggregate.Median | median | SELECT median(col) FROM VALUES (0), (10) AS tab(col) | struct<median(col):double> | | org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct<min(col):int> | | org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<min_by(x, y):string> | | org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile | SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) | struct<percentile(col, 0.3, 1):double> | diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index 5fba8616641..187a843e509 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -7,9 +7,6 @@ CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null) AS testData(a, b); -CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES -(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null) -AS aggr(k, v); -- Aggregate with empty GroupBy expressions. SELECT a, COUNT(b) FROM testData; @@ -247,29 +244,3 @@ SELECT FROM VALUES (1,4),(2,3),(1,4),(2,4) AS v(a,b) GROUP BY a; - --- SPARK-37676: Support ANSI Aggregation Function: percentile_cont -SELECT - percentile_cont(0.25) WITHIN GROUP (ORDER BY v), - percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) -FROM aggr; -SELECT - k, - percentile_cont(0.25) WITHIN GROUP (ORDER BY v), - percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) -FROM aggr -GROUP BY k -ORDER BY k; - --- SPARK-37691: Support ANSI Aggregation Function: percentile_disc -SELECT - percentile_disc(0.25) WITHIN GROUP (ORDER BY v), - percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) -FROM aggr; -SELECT - k, - percentile_disc(0.25) WITHIN GROUP (ORDER BY v), - percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) -FROM aggr -GROUP BY k -ORDER BY k; diff --git a/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql b/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql new file mode 100644 index 00000000000..db584df0d8a --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql @@ -0,0 +1,212 @@ +-- Test data. +CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES +(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null) +AS aggr(k, v); + +CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES +('Diane Murphy','Accounting',8435), +('Mary Patterson','Accounting',9998), +('Jeff Firrelli','Accounting',8992), +('William Patterson','Accounting',8870), +('Gerard Bondur','Accounting',11472), +('Anthony Bow','Accounting',6627), +('Leslie Jennings','IT',8113), +('Leslie Thompson','IT',5186), +('Julie Firrelli','Sales',9181), +('Steve Patterson','Sales',9441), +('Foon Yue Tseng','Sales',6660), +('George Vanauf','Sales',10563), +('Loui Bondur','SCM',10449), +('Gerard Hernandez','SCM',6949), +('Pamela Castillo','SCM',11303), +('Larry Bott','SCM',11798), +('Barry Jones','SCM',10586) +AS basic_pays(employee_name, department, salary); + +SELECT + percentile_cont(0.25) WITHIN GROUP (ORDER BY v), + percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr; + +SELECT + k, + percentile_cont(0.25) WITHIN GROUP (ORDER BY v), + percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr +GROUP BY k +ORDER BY k; + +SELECT + percentile_disc(0.25) WITHIN GROUP (ORDER BY v), + percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr; + +SELECT + k, + percentile_disc(0.25) WITHIN GROUP (ORDER BY v), + percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr +GROUP BY k +ORDER BY k; + +SELECT + median(v), + percentile(v, 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY v) +FROM aggr; + +SELECT + k, + median(v), + percentile(v, 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY v) +FROM aggr +GROUP BY k +ORDER BY k; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + median(salary) OVER (PARTITION BY department ORDER BY salary) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + median(salary) OVER w, + percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w, + percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WHERE salary > 8900 +WINDOW w AS (PARTITION BY department) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ORDER BY salary) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ORDER BY salary) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + median(salary) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ORDER BY salary) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + median(salary) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY salary; diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql index 66c6c7ba172..8f8963c5558 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql @@ -465,115 +465,3 @@ SELECT SUM(salary) OVER w sum_salary FROM basic_pays; - -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department), - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department) -FROM basic_pays -ORDER BY salary; - -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) -FROM basic_pays -ORDER BY salary; - -SELECT - employee_name, - department, - salary, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) -FROM basic_pays -ORDER BY salary; - -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) -FROM basic_pays -ORDER BY salary; - -SELECT - employee_name, - department, - salary, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) -FROM basic_pays -ORDER BY salary; - -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WINDOW w AS (PARTITION BY department) -ORDER BY salary; - -SELECT - employee_name, - department, - salary, - percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w, - percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WHERE salary > 8900 -WINDOW w AS (PARTITION BY department) -ORDER BY salary; - -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WINDOW w AS (PARTITION BY department ORDER BY salary) -ORDER BY salary; - -SELECT - employee_name, - department, - salary, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WINDOW w AS (PARTITION BY department ORDER BY salary) -ORDER BY salary; - -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) -ORDER BY salary; - -SELECT - employee_name, - department, - salary, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) -ORDER BY salary; diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 5c87f754dbf..50f933dfc44 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 88 +-- Number of queries: 83 -- !query @@ -12,16 +12,6 @@ struct<> --- !query -CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES -(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null) -AS aggr(k, v) --- !query schema -struct<> --- !query output - - - -- !query SELECT a, COUNT(b) FROM testData -- !query schema @@ -858,61 +848,3 @@ struct<a:int,collect_list(b):array<int>,collect_list(b):array<int>> -- !query output 1 [4,4] [4,4] 2 [3,4] [3,4] - - --- !query -SELECT - percentile_cont(0.25) WITHIN GROUP (ORDER BY v), - percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) -FROM aggr --- !query schema -struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double> --- !query output -10.0 30.0 - - --- !query -SELECT - k, - percentile_cont(0.25) WITHIN GROUP (ORDER BY v), - percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) -FROM aggr -GROUP BY k -ORDER BY k --- !query schema -struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double> --- !query output -0 10.0 30.0 -1 12.5 17.5 -2 17.5 26.25 -3 60.0 60.0 -4 NULL NULL - - --- !query -SELECT - percentile_disc(0.25) WITHIN GROUP (ORDER BY v), - percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) -FROM aggr --- !query schema -struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double> --- !query output -10.0 30.0 - - --- !query -SELECT - k, - percentile_disc(0.25) WITHIN GROUP (ORDER BY v), - percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) -FROM aggr -GROUP BY k -ORDER BY k --- !query schema -struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double> --- !query output -0 10.0 30.0 -1 10.0 20.0 -2 10.0 30.0 -3 60.0 60.0 -4 NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out new file mode 100644 index 00000000000..65acc59b9cb --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out @@ -0,0 +1,417 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 23 + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES +(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null) +AS aggr(k, v) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES +('Diane Murphy','Accounting',8435), +('Mary Patterson','Accounting',9998), +('Jeff Firrelli','Accounting',8992), +('William Patterson','Accounting',8870), +('Gerard Bondur','Accounting',11472), +('Anthony Bow','Accounting',6627), +('Leslie Jennings','IT',8113), +('Leslie Thompson','IT',5186), +('Julie Firrelli','Sales',9181), +('Steve Patterson','Sales',9441), +('Foon Yue Tseng','Sales',6660), +('George Vanauf','Sales',10563), +('Loui Bondur','SCM',10449), +('Gerard Hernandez','SCM',6949), +('Pamela Castillo','SCM',11303), +('Larry Bott','SCM',11798), +('Barry Jones','SCM',10586) +AS basic_pays(employee_name, department, salary) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT + percentile_cont(0.25) WITHIN GROUP (ORDER BY v), + percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr +-- !query schema +struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double> +-- !query output +10.0 30.0 + + +-- !query +SELECT + k, + percentile_cont(0.25) WITHIN GROUP (ORDER BY v), + percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr +GROUP BY k +ORDER BY k +-- !query schema +struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double> +-- !query output +0 10.0 30.0 +1 12.5 17.5 +2 17.5 26.25 +3 60.0 60.0 +4 NULL NULL + + +-- !query +SELECT + percentile_disc(0.25) WITHIN GROUP (ORDER BY v), + percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr +-- !query schema +struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double> +-- !query output +10.0 30.0 + + +-- !query +SELECT + k, + percentile_disc(0.25) WITHIN GROUP (ORDER BY v), + percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr +GROUP BY k +ORDER BY k +-- !query schema +struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double> +-- !query output +0 10.0 30.0 +1 10.0 20.0 +2 10.0 30.0 +3 60.0 60.0 +4 NULL NULL + + +-- !query +SELECT + median(v), + percentile(v, 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY v) +FROM aggr +-- !query schema +struct<median(v):double,percentile(v, 0.5, 1):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double> +-- !query output +20.0 20.0 20.0 + + +-- !query +SELECT + k, + median(v), + percentile(v, 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY v) +FROM aggr +GROUP BY k +ORDER BY k +-- !query schema +struct<k:int,median(v):double,percentile(v, 0.5, 1):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double> +-- !query output +0 20.0 20.0 20.0 +1 15.0 15.0 15.0 +2 22.5 22.5 22.5 +3 60.0 60.0 60.0 +4 NULL NULL NULL + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...] +-- !query output +Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0 +Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0 +Foon Yue Tseng Sales 6660 8550.75 6660.0 9721.5 10563.0 +Gerard Hernandez SCM 6949 10449.0 10449.0 11303.0 11303.0 +Leslie Jennings IT 8113 5917.75 5186.0 7381.25 8113.0 +Diane Murphy Accounting 8435 8543.75 8435.0 9746.5 9998.0 +William Patterson Accounting 8870 8543.75 8435.0 9746.5 9998.0 +Jeff Firrelli Accounting 8992 8543.75 8435.0 9746.5 9998.0 +Julie Firrelli Sales 9181 8550.75 6660.0 9721.5 10563.0 +Steve Patterson Sales 9441 8550.75 6660.0 9721.5 10563.0 +Mary Patterson Accounting 9998 8543.75 8435.0 9746.5 9998.0 +Loui Bondur SCM 10449 10449.0 10449.0 11303.0 11303.0 +George Vanauf Sales 10563 8550.75 6660.0 9721.5 10563.0 +Barry Jones SCM 10586 10449.0 10449.0 11303.0 11303.0 +Pamela Castillo SCM 11303 10449.0 10449.0 11303.0 11303.0 +Gerard Bondur Accounting 11472 8543.75 8435.0 9746.5 9998.0 +Larry Bott SCM 11798 10449.0 10449.0 11303.0 11303.0 + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_cont'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_disc'. + + +-- !query +SELECT + employee_name, + department, + salary, + median(salary) OVER (PARTITION BY department ORDER BY salary) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'median'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_cont'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_disc'. + + +-- !query +SELECT + employee_name, + department, + salary, + median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'median'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department) +ORDER BY salary +-- !query schema +struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...] +-- !query output +Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0 +Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0 +Foon Yue Tseng Sales 6660 8550.75 6660.0 9721.5 10563.0 +Gerard Hernandez SCM 6949 10449.0 10449.0 11303.0 11303.0 +Leslie Jennings IT 8113 5917.75 5186.0 7381.25 8113.0 +Diane Murphy Accounting 8435 8543.75 8435.0 9746.5 9998.0 +William Patterson Accounting 8870 8543.75 8435.0 9746.5 9998.0 +Jeff Firrelli Accounting 8992 8543.75 8435.0 9746.5 9998.0 +Julie Firrelli Sales 9181 8550.75 6660.0 9721.5 10563.0 +Steve Patterson Sales 9441 8550.75 6660.0 9721.5 10563.0 +Mary Patterson Accounting 9998 8543.75 8435.0 9746.5 9998.0 +Loui Bondur SCM 10449 10449.0 10449.0 11303.0 11303.0 +George Vanauf Sales 10563 8550.75 6660.0 9721.5 10563.0 +Barry Jones SCM 10586 10449.0 10449.0 11303.0 11303.0 +Pamela Castillo SCM 11303 10449.0 10449.0 11303.0 11303.0 +Gerard Bondur Accounting 11472 8543.75 8435.0 9746.5 9998.0 +Larry Bott SCM 11798 10449.0 10449.0 11303.0 11303.0 + + +-- !query +SELECT + employee_name, + department, + salary, + median(salary) OVER w, + percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w, + percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WHERE salary > 8900 +WINDOW w AS (PARTITION BY department) +ORDER BY salary +-- !query schema +struct<employee_name:string,department:string,salary:int,median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER [...] +-- !query output +Jeff Firrelli Accounting 8992 9998.0 9998.0 9998.0 9998.0 9998.0 +Julie Firrelli Sales 9181 9441.0 9441.0 9441.0 9441.0 9441.0 +Steve Patterson Sales 9441 9441.0 9441.0 9441.0 9441.0 9441.0 +Mary Patterson Accounting 9998 9998.0 9998.0 9998.0 9998.0 9998.0 +Loui Bondur SCM 10449 10944.5 10944.5 10586.0 10944.5 11303.0 +George Vanauf Sales 10563 9441.0 9441.0 9441.0 9441.0 9441.0 +Barry Jones SCM 10586 10944.5 10944.5 10586.0 10944.5 11303.0 +Pamela Castillo SCM 11303 10944.5 10944.5 10586.0 10944.5 11303.0 +Gerard Bondur Accounting 11472 9998.0 9998.0 9998.0 9998.0 9998.0 +Larry Bott SCM 11798 10944.5 10944.5 10586.0 10944.5 11303.0 + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ORDER BY salary) +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_cont'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ORDER BY salary) +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_disc'. + + +-- !query +SELECT + employee_name, + department, + salary, + median(salary) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ORDER BY salary) +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'median'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_cont'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_disc'. + + +-- !query +SELECT + employee_name, + department, + salary, + median(salary) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'median'. diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index 955ee0c5e6f..5d1f7a44f0d 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 66 +-- Number of queries: 55 -- !query @@ -1223,230 +1223,3 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException Window specification w is not defined in the WINDOW clause. - - --- !query -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department), - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department) -FROM basic_pays -ORDER BY salary --- !query schema -struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...] --- !query output -Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0 -Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0 -Foon Yue Tseng Sales 6660 8550.75 6660.0 9721.5 10563.0 -Gerard Hernandez SCM 6949 10449.0 10449.0 11303.0 11303.0 -Leslie Jennings IT 8113 5917.75 5186.0 7381.25 8113.0 -Diane Murphy Accounting 8435 8543.75 8435.0 9746.5 9998.0 -William Patterson Accounting 8870 8543.75 8435.0 9746.5 9998.0 -Jeff Firrelli Accounting 8992 8543.75 8435.0 9746.5 9998.0 -Julie Firrelli Sales 9181 8550.75 6660.0 9721.5 10563.0 -Steve Patterson Sales 9441 8550.75 6660.0 9721.5 10563.0 -Mary Patterson Accounting 9998 8543.75 8435.0 9746.5 9998.0 -Loui Bondur SCM 10449 10449.0 10449.0 11303.0 11303.0 -George Vanauf Sales 10563 8550.75 6660.0 9721.5 10563.0 -Barry Jones SCM 10586 10449.0 10449.0 11303.0 11303.0 -Pamela Castillo SCM 11303 10449.0 10449.0 11303.0 11303.0 -Gerard Bondur Accounting 11472 8543.75 8435.0 9746.5 9998.0 -Larry Bott SCM 11798 10449.0 10449.0 11303.0 11303.0 - - --- !query -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) -FROM basic_pays -ORDER BY salary --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -Cannot specify order by or frame for 'percentile_cont'. - - --- !query -SELECT - employee_name, - department, - salary, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) -FROM basic_pays -ORDER BY salary --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -Cannot specify order by or frame for 'percentile_disc'. - - --- !query -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) -FROM basic_pays -ORDER BY salary --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -Cannot specify order by or frame for 'percentile_cont'. - - --- !query -SELECT - employee_name, - department, - salary, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) -FROM basic_pays -ORDER BY salary --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -Cannot specify order by or frame for 'percentile_disc'. - - --- !query -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WINDOW w AS (PARTITION BY department) -ORDER BY salary --- !query schema -struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...] --- !query output -Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0 -Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0 -Foon Yue Tseng Sales 6660 8550.75 6660.0 9721.5 10563.0 -Gerard Hernandez SCM 6949 10449.0 10449.0 11303.0 11303.0 -Leslie Jennings IT 8113 5917.75 5186.0 7381.25 8113.0 -Diane Murphy Accounting 8435 8543.75 8435.0 9746.5 9998.0 -William Patterson Accounting 8870 8543.75 8435.0 9746.5 9998.0 -Jeff Firrelli Accounting 8992 8543.75 8435.0 9746.5 9998.0 -Julie Firrelli Sales 9181 8550.75 6660.0 9721.5 10563.0 -Steve Patterson Sales 9441 8550.75 6660.0 9721.5 10563.0 -Mary Patterson Accounting 9998 8543.75 8435.0 9746.5 9998.0 -Loui Bondur SCM 10449 10449.0 10449.0 11303.0 11303.0 -George Vanauf Sales 10563 8550.75 6660.0 9721.5 10563.0 -Barry Jones SCM 10586 10449.0 10449.0 11303.0 11303.0 -Pamela Castillo SCM 11303 10449.0 10449.0 11303.0 11303.0 -Gerard Bondur Accounting 11472 8543.75 8435.0 9746.5 9998.0 -Larry Bott SCM 11798 10449.0 10449.0 11303.0 11303.0 - - --- !query -SELECT - employee_name, - department, - salary, - percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w, - percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WHERE salary > 8900 -WINDOW w AS (PARTITION BY department) -ORDER BY salary --- !query schema -struct<employee_name:string,department:string,salary:int,percentile_cont(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,perc [...] --- !query output -Jeff Firrelli Accounting 8992 9998.0 9998.0 9998.0 9998.0 -Julie Firrelli Sales 9181 9441.0 9441.0 9441.0 9441.0 -Steve Patterson Sales 9441 9441.0 9441.0 9441.0 9441.0 -Mary Patterson Accounting 9998 9998.0 9998.0 9998.0 9998.0 -Loui Bondur SCM 10449 10944.5 10586.0 10944.5 11303.0 -George Vanauf Sales 10563 9441.0 9441.0 9441.0 9441.0 -Barry Jones SCM 10586 10944.5 10586.0 10944.5 11303.0 -Pamela Castillo SCM 11303 10944.5 10586.0 10944.5 11303.0 -Gerard Bondur Accounting 11472 9998.0 9998.0 9998.0 9998.0 -Larry Bott SCM 11798 10944.5 10586.0 10944.5 11303.0 - - --- !query -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WINDOW w AS (PARTITION BY department ORDER BY salary) -ORDER BY salary --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -Cannot specify order by or frame for 'percentile_cont'. - - --- !query -SELECT - employee_name, - department, - salary, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WINDOW w AS (PARTITION BY department ORDER BY salary) -ORDER BY salary --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -Cannot specify order by or frame for 'percentile_disc'. - - --- !query -SELECT - employee_name, - department, - salary, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) -ORDER BY salary --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -Cannot specify order by or frame for 'percentile_cont'. - - --- !query -SELECT - employee_name, - department, - salary, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, - percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w -FROM basic_pays -WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) -ORDER BY salary --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -Cannot specify order by or frame for 'percentile_disc'. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org