This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6604929fe6d [SPARK-37702][SQL] Support ANSI Aggregate Function: regr_syy 6604929fe6d is described below commit 6604929fe6dee7216a24b4d0c96d478cae13e2b9 Author: Jiaan Geng <belie...@163.com> AuthorDate: Thu May 12 13:30:26 2022 +0800 [SPARK-37702][SQL] Support ANSI Aggregate Function: regr_syy ### What changes were proposed in this pull request? This PR used to support ANSI aggregate Function: `regr_syy` The mainstream database supports `regr_syy` show below: **Teradata** https://docs.teradata.com/r/kmuOwjp1zEYg98JsB8fu_A/ZsXbiMrja5EYTft42VoiTQ **Snowflake** https://docs.snowflake.com/en/sql-reference/functions/regr_syy.html **Oracle** https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/REGR_-Linear-Regression-Functions.html#GUID-A675B68F-2A88-4843-BE2C-FCDE9C65F9A9 **DB2** https://www.ibm.com/docs/en/db2/11.5?topic=af-regression-functions-regr-avgx-regr-avgy-regr-count **H2** http://www.h2database.com/html/functions-aggregate.html#regr_syy **Postgresql** https://www.postgresql.org/docs/8.4/functions-aggregate.html **Sybase** https://infocenter.sybase.com/help/index.jsp?topic=/com.sybase.help.sqlanywhere.12.0.0/dbreference/regr-syy-function.html **Exasol** https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/regr_function.htm ### Why are the changes needed? `regr_syy` is very useful. ### Does this PR introduce _any_ user-facing change? 'Yes'. New feature. ### How was this patch tested? New tests. Closes #36292 from beliefer/SPARK-37702. Authored-by: Jiaan Geng <belie...@163.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/aggregate/CentralMomentAgg.scala | 4 +- .../expressions/aggregate/linearRegression.scala | 33 +++++++++++- .../sql-functions/sql-expression-schema.md | 1 + .../test/resources/sql-tests/inputs/group-by.sql | 6 +++ .../inputs/postgreSQL/aggregates_part1.sql | 23 ++++---- .../inputs/udf/postgreSQL/udf-aggregates_part1.sql | 23 ++++---- .../resources/sql-tests/results/group-by.sql.out | 35 +++++++++++- .../results/postgreSQL/aggregates_part1.sql.out | 62 +++++++++++++++++++++- .../udf/postgreSQL/udf-aggregates_part1.sql.out | 62 +++++++++++++++++++++- 10 files changed, 220 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index f7af5b35a3b..a56ef175b5e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -503,6 +503,7 @@ object FunctionRegistry { expression[RegrR2]("regr_r2"), expression[RegrSXX]("regr_sxx"), expression[RegrSXY]("regr_sxy"), + expression[RegrSYY]("regr_syy"), // string functions expression[Ascii]("ascii"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala index a40c5e4815f..f9bfe77ce5a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala @@ -264,7 +264,7 @@ case class VarianceSamp( copy(child = newChild) } -case class RegrSXXReplacement(child: Expression) +case class RegrReplacement(child: Expression) extends CentralMomentAgg(child, !SQLConf.get.legacyStatisticalAggregate) { override protected def momentOrder = 2 @@ -273,7 +273,7 @@ case class RegrSXXReplacement(child: Expression) If(n === 0.0, Literal.create(null, DoubleType), m2) } - override protected def withNewChildInternal(newChild: Expression): RegrSXXReplacement = + override protected def withNewChildInternal(newChild: Expression): RegrReplacement = copy(child = newChild) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala index 568c186f06d..0372f94031e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala @@ -174,7 +174,7 @@ case class RegrSXX( with ImplicitCastInputTypes with BinaryLike[Expression] { override lazy val replacement: Expression = - RegrSXXReplacement(If(Or(IsNull(left), IsNull(right)), Literal.create(null, DoubleType), right)) + RegrReplacement(If(Or(IsNull(left), IsNull(right)), Literal.create(null, DoubleType), right)) override def nodeName: String = "regr_sxx" override def inputTypes: Seq[DoubleType] = Seq(DoubleType, DoubleType) override protected def withNewChildrenInternal( @@ -206,3 +206,34 @@ case class RegrSXY(y: Expression, x: Expression) extends Covariance(y, x, true) newLeft: Expression, newRight: Expression): RegrSXY = this.copy(y = newLeft, x = newRight) } + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(y, x) - Returns REGR_COUNT(y, x) * VAR_POP(y) for non-null pairs in a group, where `y` is the dependent variable and `x` is the independent variable.", + examples = """ + Examples: + > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); + 0.75 + > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x); + 0.6666666666666666 + > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x); + 0.5 + """, + group = "agg_funcs", + since = "3.4.0") +// scalastyle:on line.size.limit +case class RegrSYY( + left: Expression, + right: Expression) + extends AggregateFunction + with RuntimeReplaceableAggregate + with ImplicitCastInputTypes + with BinaryLike[Expression] { + override lazy val replacement: Expression = + RegrReplacement(If(Or(IsNull(left), IsNull(right)), Literal.create(null, DoubleType), left)) + override def nodeName: String = "regr_syy" + override def inputTypes: Seq[DoubleType] = Seq(DoubleType, DoubleType) + override protected def withNewChildrenInternal( + newLeft: Expression, newRight: Expression): RegrSYY = + this.copy(left = newLeft, right = newRight) +} diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 0115578e909..618e1e8087c 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -372,6 +372,7 @@ | org.apache.spark.sql.catalyst.expressions.aggregate.RegrR2 | regr_r2 | SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_r2(y, x):double> | | org.apache.spark.sql.catalyst.expressions.aggregate.RegrSXX | regr_sxx | SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_sxx(y, x):double> | | org.apache.spark.sql.catalyst.expressions.aggregate.RegrSXY | regr_sxy | SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_sxy(y, x):double> | +| org.apache.spark.sql.catalyst.expressions.aggregate.RegrSYY | regr_syy | SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_syy(y, x):double> | | org.apache.spark.sql.catalyst.expressions.aggregate.Skewness | skewness | SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct<skewness(col):double> | | org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop | stddev_pop | SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<stddev_pop(col):double> | | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | std | SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<std(col):double> | diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index b45538b498d..401ab5a99d2 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -282,6 +282,12 @@ SELECT regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL; SELECT k, regr_sxy(y, x) FROM testRegression GROUP BY k; SELECT k, regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k; +-- SPARK-37702: Support ANSI Aggregate Function: regr_syy +SELECT regr_syy(y, x) FROM testRegression; +SELECT regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL; +SELECT k, regr_syy(y, x) FROM testRegression GROUP BY k; +SELECT k, regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k; + -- SPARK-37676: Support ANSI Aggregation Function: percentile_cont SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v), diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql index 58def6729c8..5f3d6b7e60c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql @@ -82,7 +82,7 @@ FROM (VALUES (7000000000005), (7000000000007)) v(x); -- SQL2003 binary aggregates [SPARK-23907] SELECT regr_count(b, a) FROM aggtest; SELECT regr_sxx(b, a) FROM aggtest; --- SELECT regr_syy(b, a) FROM aggtest; +SELECT regr_syy(b, a) FROM aggtest; SELECT regr_sxy(b, a) FROM aggtest; SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest; SELECT regr_r2(b, a) FROM aggtest; @@ -92,18 +92,17 @@ SELECT corr(b, a) FROM aggtest; -- test accum and combine functions directly [SPARK-23907] --- CREATE TABLE regr_test (x float8, y float8); --- INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200); --- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) --- FROM regr_test WHERE x IN (10,20,30,80); --- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) --- FROM regr_test; +CREATE TEMPORARY VIEW regr_test AS SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y); +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (10,20,30,80); +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test; -- SELECT float8_accum('{4,140,2900}'::float8[], 100); -- SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100); --- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) --- FROM regr_test WHERE x IN (10,20,30); --- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) --- FROM regr_test WHERE x IN (80,100); +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (10,20,30); +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (80,100); -- SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]); -- SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]); -- SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]); @@ -113,7 +112,7 @@ SELECT corr(b, a) FROM aggtest; -- '{2,180,200,740,57800,-3400}'::float8[]); -- SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[], -- '{2,180,200,740,57800,-3400}'::float8[]); --- DROP TABLE regr_test; +DROP VIEW regr_test; -- test count, distinct diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql index 525bfa8b0b0..c760da09ded 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql @@ -82,7 +82,7 @@ FROM (VALUES (7000000000005), (7000000000007)) v(x); -- SQL2003 binary aggregates [SPARK-23907] SELECT regr_count(b, a) FROM aggtest; SELECT regr_sxx(b, a) FROM aggtest; --- SELECT regr_syy(b, a) FROM aggtest; +SELECT regr_syy(b, a) FROM aggtest; SELECT regr_sxy(b, a) FROM aggtest; SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest; SELECT regr_r2(b, a) FROM aggtest; @@ -92,18 +92,17 @@ SELECT corr(b, udf(a)) FROM aggtest; -- test accum and combine functions directly [SPARK-23907] --- CREATE TABLE regr_test (x float8, y float8); --- INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200); --- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) --- FROM regr_test WHERE x IN (10,20,30,80); --- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) --- FROM regr_test; +CREATE TEMPORARY VIEW regr_test AS SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y); +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (10,20,30,80); +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test; -- SELECT float8_accum('{4,140,2900}'::float8[], 100); -- SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100); --- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) --- FROM regr_test WHERE x IN (10,20,30); --- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) --- FROM regr_test WHERE x IN (80,100); +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (10,20,30); +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (80,100); -- SELECT float8_combine('{3,60,200}'::float8[],ELECT CAST(udf(covar_pop(b, udf(a))) AS '{0,0,0}'::float8[]); -- SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]); -- SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]); @@ -113,7 +112,7 @@ SELECT corr(b, udf(a)) FROM aggtest; -- '{2,180,200,740,57800,-3400}'::float8[]); -- SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[], -- '{2,180,200,740,57800,-3400}'::float8[]); --- DROP TABLE regr_test; +DROP VIEW regr_test; -- test count, distinct diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 08b329b6215..64599ef71d6 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 109 +-- Number of queries: 113 -- !query @@ -1038,6 +1038,39 @@ struct<k:int,regr_sxy(y, x):double> 2 240.0 +-- !query +SELECT regr_syy(y, x) FROM testRegression +-- !query schema +struct<regr_syy(y, x):double> +-- !query output +200.0 + + +-- !query +SELECT regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL +-- !query schema +struct<regr_syy(y, x):double> +-- !query output +200.0 + + +-- !query +SELECT k, regr_syy(y, x) FROM testRegression GROUP BY k +-- !query schema +struct<k:int,regr_syy(y, x):double> +-- !query output +1 NULL +2 200.0 + + +-- !query +SELECT k, regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k +-- !query schema +struct<k:int,regr_syy(y, x):double> +-- !query output +2 200.0 + + -- !query SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v), diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out index 1ac9705aaf2..6d5b89f0c63 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 49 +-- Number of queries: 56 -- !query @@ -304,6 +304,14 @@ struct<regr_sxx(b, a):double> 5099.0 +-- !query +SELECT regr_syy(b, a) FROM aggtest +-- !query schema +struct<regr_syy(b, a):double> +-- !query output +68756.21569392929 + + -- !query SELECT regr_sxy(b, a) FROM aggtest -- !query schema @@ -344,6 +352,58 @@ struct<corr(b, a):double> 0.1396345165178734 +-- !query +CREATE TEMPORARY VIEW regr_test AS SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (10,20,30,80) +-- !query schema +struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double> +-- !query output +4 140 2900.0 1290 83075.0 15050.0 + + +-- !query +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test +-- !query schema +struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double> +-- !query output +5 240 6280.0 1490 95080.0 8680.0 + + +-- !query +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (10,20,30) +-- !query schema +struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double> +-- !query output +3 60 200.0 750 20000.0 2000.0 + + +-- !query +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (80,100) +-- !query schema +struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double> +-- !query output +2 180 200.0 740 57800.0 -3400.0 + + +-- !query +DROP VIEW regr_test +-- !query schema +struct<> +-- !query output + + + -- !query SELECT count(four) AS cnt_1000 FROM onek -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out index 06ff77cca9a..268a8553cb4 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 48 +-- Number of queries: 55 -- !query @@ -295,6 +295,14 @@ struct<regr_sxx(b, a):double> 5099.0 +-- !query +SELECT regr_syy(b, a) FROM aggtest +-- !query schema +struct<regr_syy(b, a):double> +-- !query output +68756.21569392929 + + -- !query SELECT regr_sxy(b, a) FROM aggtest -- !query schema @@ -335,6 +343,58 @@ struct<corr(b, udf(a)):double> 0.1396345165178734 +-- !query +CREATE TEMPORARY VIEW regr_test AS SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (10,20,30,80) +-- !query schema +struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double> +-- !query output +4 140 2900.0 1290 83075.0 15050.0 + + +-- !query +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test +-- !query schema +struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double> +-- !query output +5 240 6280.0 1490 95080.0 8680.0 + + +-- !query +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (10,20,30) +-- !query schema +struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double> +-- !query output +3 60 200.0 750 20000.0 2000.0 + + +-- !query +SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) +FROM regr_test WHERE x IN (80,100) +-- !query schema +struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double> +-- !query output +2 180 200.0 740 57800.0 -3400.0 + + +-- !query +DROP VIEW regr_test +-- !query schema +struct<> +-- !query output + + + -- !query SELECT count(udf(four)) AS cnt_1000 FROM onek -- !query schema --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org