This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new efc1e8ac8bc [SPARK-39213][SQL] Create ANY_VALUE aggregate function efc1e8ac8bc is described below commit efc1e8ac8bc61872601ac2244629a9d54f8889fb Author: Vitalii Li <vitalii...@databricks.com> AuthorDate: Fri May 20 22:28:18 2022 +0300 [SPARK-39213][SQL] Create ANY_VALUE aggregate function ### What changes were proposed in this pull request? Adding implementation for ANY_VALUE aggregate function. During optimization stage it is rewritten to `First` aggregate function. ### Why are the changes needed? This feature provides feature parity with popular DBs and DWHs ### Does this PR introduce _any_ user-facing change? Yes - introducing new aggregate function `ANY_VALUE`. Respective documentation is updated. ### How was this patch tested? Unit tests Closes #36584 from vli-databricks/SPARK-39213. Authored-by: Vitalii Li <vitalii...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- docs/sql-ref-ansi-compliance.md | 1 + .../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 + .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 3 + .../spark/sql/catalyst/analysis/Analyzer.scala | 1 + .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../catalyst/expressions/aggregate/AnyValue.scala | 64 +++ .../spark/sql/catalyst/parser/AstBuilder.scala | 10 +- .../spark/sql/catalyst/SQLKeywordSuite.scala | 2 +- .../expressions/aggregate/FirstLastTestSuite.scala | 4 + .../sql-functions/sql-expression-schema.md | 1 + .../resources/sql-tests/inputs/udf/udf-window.sql | 8 +- .../src/test/resources/sql-tests/inputs/window.sql | 29 +- .../sql-tests/results/udf/udf-window.sql.out | 46 +- .../resources/sql-tests/results/window.sql.out | 574 +++++++++++---------- 14 files changed, 446 insertions(+), 299 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 257f53caef1..bb55cec52f5 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -346,6 +346,7 @@ Below is a list of all the keywords in Spark SQL. |AND|reserved|non-reserved|reserved| |ANTI|non-reserved|strict-non-reserved|non-reserved| |ANY|reserved|non-reserved|reserved| +|ANY_VALUE|non-reserved|non-reserved|non-reserved| |ARCHIVE|non-reserved|non-reserved|non-reserved| |ARRAY|non-reserved|non-reserved|reserved| |AS|reserved|non-reserved|reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index fac87c62de0..1cbd6d24dea 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -95,6 +95,7 @@ ANALYZE: 'ANALYZE'; AND: 'AND'; ANTI: 'ANTI'; ANY: 'ANY'; +ANY_VALUE: 'ANY_VALUE'; ARCHIVE: 'ARCHIVE'; ARRAY: 'ARRAY'; AS: 'AS'; diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index ed57e9062c1..ce37a09d5ba 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -824,6 +824,7 @@ primaryExpression | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN #cast | STRUCT LEFT_PAREN (argument+=namedExpression (COMMA argument+=namedExpression)*)? RIGHT_PAREN #struct | FIRST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #first + | ANY_VALUE LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #any_value | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #last | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN #position | constant #constantDefault @@ -1072,6 +1073,7 @@ ansiNonReserved | ALTER | ANALYZE | ANTI + | ANY_VALUE | ARCHIVE | ARRAY | ASC @@ -1314,6 +1316,7 @@ nonReserved | ANALYZE | AND | ANY + | ANY_VALUE | ARCHIVE | ARRAY | AS diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 4dd2081c67f..c5bee6f55fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2245,6 +2245,7 @@ class Analyzer(override val catalogManager: CatalogManager) val aggFunc = agg match { case first: First => first.copy(ignoreNulls = u.ignoreNulls) case last: Last => last.copy(ignoreNulls = u.ignoreNulls) + case any_value: AnyValue => any_value.copy(ignoreNulls = u.ignoreNulls) case _ => throw QueryCompilationErrors.functionWithUnsupportedSyntaxError( agg.prettyName, "IGNORE NULLS") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 5084753d2d4..bc463ee5ae5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -467,6 +467,7 @@ object FunctionRegistry { expression[CovSample]("covar_samp"), expression[First]("first"), expression[First]("first_value", true), + expression[AnyValue]("any_value"), expression[Kurtosis]("kurtosis"), expression[Last]("last"), expression[Last]("last_value", true), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala new file mode 100644 index 00000000000..47559b90e9c --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.aggregate + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.trees.UnaryLike +import org.apache.spark.sql.types._ + +/** + * Returns the first value of `child` for a group of rows. If the first value of `child` + * is `null`, it returns `null` (respecting nulls). Even if [[AnyValue]] is used on an already + * sorted column, if we do partial aggregation and final aggregation (when mergeExpression + * is used) its result will not be deterministic (unless the input table is sorted and has + * a single partition, and we use a single reducer to do the aggregation.). + * Interchangeable with [[First]]. + */ +@ExpressionDescription( + usage = """ + _FUNC_(expr[, isIgnoreNull]) - Returns some value of `expr` for a group of rows. + If `isIgnoreNull` is true, returns only non-null values.""", + examples = """ + Examples: + > SELECT _FUNC_(col) FROM VALUES (10), (5), (20) AS tab(col); + 10 + > SELECT _FUNC_(col) FROM VALUES (NULL), (5), (20) AS tab(col); + NULL + > SELECT _FUNC_(col, true) FROM VALUES (NULL), (5), (20) AS tab(col); + 5 + """, + note = """ + The function is non-deterministic. + """, + group = "agg_funcs", + since = "3.4.0") +case class AnyValue(child: Expression, ignoreNulls: Boolean) + extends AggregateFunction with ExpectsInputTypes with RuntimeReplaceableAggregate + with UnaryLike[Expression] { + override lazy val replacement: Expression = First(child, ignoreNulls) + + def this(child: Expression) = this(child, false) + + def this(child: Expression, ignoreNullsExpr: Expression) = { + this(child, FirstLast.validateIgnoreNullExpr(ignoreNullsExpr, "any_value")) + } + + override protected def withNewChildInternal(newChild: Expression): AnyValue = + copy(child = newChild) + override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, BooleanType) +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ff3b99fb815..e8d48677671 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableId import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last, PercentileCont, PercentileDisc} +import org.apache.spark.sql.catalyst.expressions.aggregate.{AnyValue, First, Last, PercentileCont, PercentileDisc} import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -1818,6 +1818,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit First(expression(ctx.expression), ignoreNullsExpr).toAggregateExpression() } + /** + * Create an [[AnyValue]] expression. + */ + override def visitAny_value(ctx: Any_valueContext): Expression = withOrigin(ctx) { + val ignoreNullsExpr = ctx.IGNORE != null + AnyValue(expression(ctx.expression), ignoreNullsExpr).toAggregateExpression() + } + /** * Create a [[Last]] expression. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala index 0c1c9d5bfee..5957adcd75e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala @@ -111,7 +111,7 @@ trait SQLKeywordUtils extends SparkFunSuite with SQLHelper { keywords.toMap } - // All the SQL keywords defined in `SqlBase.g4` + // All the SQL keywords defined in `SqlBaseLexer.g4` val allCandidateKeywords: Set[String] = { val kwDef = """([A-Z_]+):.+;""".r parseAntlrGrammars( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/FirstLastTestSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/FirstLastTestSuite.scala index bb6672e1046..292edc71553 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/FirstLastTestSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/FirstLastTestSuite.scala @@ -117,5 +117,9 @@ class FirstLastTestSuite extends SparkFunSuite { new Last(input, Literal(1, IntegerType)) }.getMessage assert(msg2.contains("The second argument in last should be a boolean literal")) + val msg3 = intercept[AnalysisException] { + new AnyValue(input, Literal(1, IntegerType)) + }.getMessage + assert(msg3.contains("The second argument in any_value should be a boolean literal")) } } diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index ca31d169692..be572b0cfd5 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -336,6 +336,7 @@ | org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct<xxhash64(Spark, array(123), 2):bigint> | | org.apache.spark.sql.catalyst.expressions.Year | year | SELECT year('2016-07-30') | struct<year(2016-07-30):int> | | org.apache.spark.sql.catalyst.expressions.ZipWith | zip_with | SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) | struct<zip_with(array(1, 2, 3), array(a, b, c), lambdafunction(named_struct(y, namedlambdavariable(), x, namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array<struct<y:string,x:int>>> | +| org.apache.spark.sql.catalyst.expressions.aggregate.AnyValue | any_value | SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<anyvalue(col):int> | | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | approx_percentile | SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct<approx_percentile(col, array(0.5, 0.4, 0.1), 100):array<int>> | | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | percentile_approx | SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct<percentile_approx(col, array(0.5, 0.4, 0.1), 100):array<int>> | | org.apache.spark.sql.catalyst.expressions.aggregate.Average | avg | SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<avg(col):double> | diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-window.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-window.sql index 1659f1c8195..6cf89fbdb40 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-window.sql @@ -66,6 +66,9 @@ stddev(udf(val)) OVER w AS stddev, first_value(udf(val)) OVER w AS first_value, first_value(udf(val), true) OVER w AS first_value_ignore_null, first_value(udf(val), false) OVER w AS first_value_contain_null, +any_value(udf(val)) OVER w AS any_value, +any_value(udf(val), true) OVER w AS any_value_ignore_null, +any_value(udf(val), false) OVER w AS any_value_contain_null, last_value(udf(val)) OVER w AS last_value, last_value(udf(val), true) OVER w AS last_value_ignore_null, last_value(udf(val), false) OVER w AS last_value_contain_null, @@ -99,11 +102,14 @@ SELECT udf(val), cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER -- Over clause is empty SELECT udf(val), cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val; --- first_value()/last_value() over () +-- first_value()/last_value()/any_value() over () SELECT udf(val), cate, first_value(false) OVER w AS first_value, first_value(true, true) OVER w AS first_value_ignore_null, first_value(false, false) OVER w AS first_value_contain_null, +any_value(false) OVER w AS any_value, +any_value(true, true) OVER w AS any_value_ignore_null, +any_value(false, false) OVER w AS any_value_contain_null, last_value(false) OVER w AS last_value, last_value(true, true) OVER w AS last_value_ignore_null, last_value(false, false) OVER w AS last_value_contain_null diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql index e982683250c..66c6c7ba172 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql @@ -123,6 +123,9 @@ stddev(val) OVER w AS stddev, first_value(val) OVER w AS first_value, first_value(val, true) OVER w AS first_value_ignore_null, first_value(val, false) OVER w AS first_value_contain_null, +any_value(val) OVER w AS any_value, +any_value(val, true) OVER w AS any_value_ignore_null, +any_value(val, false) OVER w AS any_value_contain_null, last_value(val) OVER w AS last_value, last_value(val, true) OVER w AS last_value_ignore_null, last_value(val, false) OVER w AS last_value_contain_null, @@ -156,11 +159,14 @@ SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY ca -- Over clause is empty SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val; --- first_value()/last_value() over () +-- first_value()/last_value()/any_value() over () SELECT val, cate, first_value(false) OVER w AS first_value, first_value(true, true) OVER w AS first_value_ignore_null, first_value(false, false) OVER w AS first_value_contain_null, +any_value(false) OVER w AS any_value, +any_value(true, true) OVER w AS any_value_ignore_null, +any_value(false, false) OVER w AS any_value_contain_null, last_value(false) OVER w AS last_value, last_value(true, true) OVER w AS last_value_ignore_null, last_value(false, false) OVER w AS last_value_contain_null @@ -179,11 +185,12 @@ SELECT val, cate, count(val) FILTER (WHERE val > 1) OVER(PARTITION BY cate) FROM testData ORDER BY cate, val; --- nth_value()/first_value() over () +-- nth_value()/first_value()/any_value() over () SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -194,6 +201,7 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -204,6 +212,7 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -214,6 +223,7 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -224,6 +234,7 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -234,6 +245,7 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -244,6 +256,7 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -254,6 +267,7 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -264,6 +278,7 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -289,6 +304,7 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -313,6 +329,7 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null @@ -327,6 +344,7 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null @@ -341,6 +359,7 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null @@ -355,6 +374,7 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null @@ -369,6 +389,7 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null @@ -383,6 +404,7 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null @@ -397,6 +419,7 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null @@ -411,6 +434,7 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null @@ -425,6 +449,7 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out index 5e359e38379..aa25ddb6e7c 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out @@ -262,6 +262,9 @@ stddev(udf(val)) OVER w AS stddev, first_value(udf(val)) OVER w AS first_value, first_value(udf(val), true) OVER w AS first_value_ignore_null, first_value(udf(val), false) OVER w AS first_value_contain_null, +any_value(udf(val)) OVER w AS any_value, +any_value(udf(val), true) OVER w AS any_value_ignore_null, +any_value(udf(val), false) OVER w AS any_value_contain_null, last_value(udf(val)) OVER w AS last_value, last_value(udf(val), true) OVER w AS last_value_ignore_null, last_value(udf(val), false) OVER w AS last_value_contain_null, @@ -286,17 +289,17 @@ FROM testData WINDOW w AS (PARTITION BY udf(cate) ORDER BY udf(val)) ORDER BY cate, udf(val) -- !query schema -struct<udf(val):int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_ [...] +struct<udf(val):int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,any_value:int,any_value_ignore_null:int,any_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,st [...] -- !query output -NULL NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.5 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL -3 NULL 3 3 3 1 3 3.0 NULL NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NULL 1 0.0 NULL NULL 0.0 [3] [3] NULL NULL -NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL -1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 1 2 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 -1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 2 3 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 -2 a 2 1 1 3 4 1.3333333333333333 0.5773502691896258 NULL 1 NULL 2 2 2 4 3 1.0 1.0 2 4 0.22222222222222224 0.33333333333333337 2 4.772185885555555E8 1.0 0.5773502691896258 0.4714045207910317 [1,1,2] [1,2] 1.1539890888012805 -0.6672217220327235 -1 b 1 1 1 1 1 1.0 NULL 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NULL 1 NULL NULL NULL 0.0 [1] [1] NULL NULL -2 b 2 1 1 2 3 1.5 0.7071067811865476 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2 0.0 NULL 0.7071067811865476 0.5 [1,2] [1,2] 0.0 -2.0000000000000013 -3 b 3 1 1 3 6 2.0 1.0 1 1 1 3 3 3 3 3 1.0 1.0 2 3 0.6666666666666666 1.0 3 5.3687091175E8 1.0 1.0 0.816496580927726 [1,2,3] [1,2,3] 0.7057890433107311 -1.4999999999999984 +NULL NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.5 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL +3 NULL 3 3 3 1 3 3.0 NULL NULL 3 NULL NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NULL 1 0.0 NULL NULL 0.0 [3] [3] NULL NULL +NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL +1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 1 2 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 +1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 2 3 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 +2 a 2 1 1 3 4 1.3333333333333333 0.5773502691896258 NULL 1 NULL NULL 1 NULL 2 2 2 4 3 1.0 1.0 2 4 0.22222222222222224 0.33333333333333337 2 4.772185885555555E8 1.0 0.5773502691896258 0.4714045207910317 [1,1,2] [1,2] 1.1539890888012805 -0.6672217220327235 +1 b 1 1 1 1 1 1.0 NULL 1 1 1 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NULL 1 NULL NULL NULL 0.0 [1] [1] NULL NULL +2 b 2 1 1 2 3 1.5 0.7071067811865476 1 1 1 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2 0.0 NULL 0.7071067811865476 0.5 [1,2] [1,2] 0.0 -2.0000000000000013 +3 b 3 1 1 3 6 2.0 1.0 1 1 1 1 1 1 3 3 3 3 3 1.0 1.0 2 3 0.6666666666666666 1.0 3 5.3687091175E8 1.0 1.0 0.816496580927726 [1,2,3] [1,2,3] 0.7057890433107311 -1.4999999999999984 -- !query @@ -345,6 +348,9 @@ SELECT udf(val), cate, first_value(false) OVER w AS first_value, first_value(true, true) OVER w AS first_value_ignore_null, first_value(false, false) OVER w AS first_value_contain_null, +any_value(false) OVER w AS any_value, +any_value(true, true) OVER w AS any_value_ignore_null, +any_value(false, false) OVER w AS any_value_contain_null, last_value(false) OVER w AS last_value, last_value(true, true) OVER w AS last_value_ignore_null, last_value(false, false) OVER w AS last_value_contain_null @@ -352,17 +358,17 @@ FROM testData WINDOW w AS () ORDER BY cate, val -- !query schema -struct<udf(val):int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean> +struct<udf(val):int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,any_value:boolean,any_value_ignore_null:boolean,any_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean> -- !query output -NULL NULL false true false false true false -3 NULL false true false false true false -NULL a false true false false true false -1 a false true false false true false -1 a false true false false true false -2 a false true false false true false -1 b false true false false true false -2 b false true false false true false -3 b false true false false true false +NULL NULL false true false false true false false true false +3 NULL false true false false true false false true false +NULL a false true false false true false false true false +1 a false true false false true false false true false +1 a false true false false true false false true false +2 a false true false false true false false true false +1 b false true false false true false false true false +2 b false true false false true false false true false +3 b false true false false true false false true false -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index fb88ab7c41b..955ee0c5e6f 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -425,6 +425,9 @@ stddev(val) OVER w AS stddev, first_value(val) OVER w AS first_value, first_value(val, true) OVER w AS first_value_ignore_null, first_value(val, false) OVER w AS first_value_contain_null, +any_value(val) OVER w AS any_value, +any_value(val, true) OVER w AS any_value_ignore_null, +any_value(val, false) OVER w AS any_value_contain_null, last_value(val) OVER w AS last_value, last_value(val, true) OVER w AS last_value_ignore_null, last_value(val, false) OVER w AS last_value_contain_null, @@ -449,17 +452,17 @@ FROM testData WINDOW w AS (PARTITION BY cate ORDER BY val) ORDER BY cate, val -- !query schema -struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:a [...] +struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,any_value:int,any_value_ignore_null:int,any_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_ [...] -- !query output -NULL NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.5 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL -3 NULL 3 3 3 1 3 3.0 NULL NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NULL 1 0.0 NULL NULL 0.0 [3] [3] NULL NULL -NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL -1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 1 2 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 -1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 2 3 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 -2 a 2 1 1 3 4 1.3333333333333333 0.5773502691896258 NULL 1 NULL 2 2 2 4 3 1.0 1.0 2 4 0.22222222222222224 0.33333333333333337 2 4.772185885555555E8 1.0 0.5773502691896258 0.4714045207910317 [1,1,2] [1,2] 1.1539890888012805 -0.6672217220327235 -1 b 1 1 1 1 1 1.0 NULL 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NULL 1 NULL NULL NULL 0.0 [1] [1] NULL NULL -2 b 2 1 1 2 3 1.5 0.7071067811865476 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2 0.0 NULL 0.7071067811865476 0.5 [1,2] [1,2] 0.0 -2.0000000000000013 -3 b 3 1 1 3 6 2.0 1.0 1 1 1 3 3 3 3 3 1.0 1.0 2 3 0.6666666666666666 1.0 3 5.3687091175E8 1.0 1.0 0.816496580927726 [1,2,3] [1,2,3] 0.7057890433107311 -1.4999999999999984 +NULL NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.5 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL +3 NULL 3 3 3 1 3 3.0 NULL NULL 3 NULL NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NULL 1 0.0 NULL NULL 0.0 [3] [3] NULL NULL +NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0 NULL NULL NULL NULL [] [] NULL NULL +1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 1 2 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 +1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 2 3 0.0 0.0 1 0.0 NULL 0.0 0.0 [1,1] [1] 0.7071067811865476 -1.5 +2 a 2 1 1 3 4 1.3333333333333333 0.5773502691896258 NULL 1 NULL NULL 1 NULL 2 2 2 4 3 1.0 1.0 2 4 0.22222222222222224 0.33333333333333337 2 4.772185885555555E8 1.0 0.5773502691896258 0.4714045207910317 [1,1,2] [1,2] 1.1539890888012805 -0.6672217220327235 +1 b 1 1 1 1 1 1.0 NULL 1 1 1 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NULL 1 NULL NULL NULL 0.0 [1] [1] NULL NULL +2 b 2 1 1 2 3 1.5 0.7071067811865476 1 1 1 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2 0.0 NULL 0.7071067811865476 0.5 [1,2] [1,2] 0.0 -2.0000000000000013 +3 b 3 1 1 3 6 2.0 1.0 1 1 1 1 1 1 3 3 3 3 3 1.0 1.0 2 3 0.6666666666666666 1.0 3 5.3687091175E8 1.0 1.0 0.816496580927726 [1,2,3] [1,2,3] 0.7057890433107311 -1.4999999999999984 -- !query @@ -508,6 +511,9 @@ SELECT val, cate, first_value(false) OVER w AS first_value, first_value(true, true) OVER w AS first_value_ignore_null, first_value(false, false) OVER w AS first_value_contain_null, +any_value(false) OVER w AS any_value, +any_value(true, true) OVER w AS any_value_ignore_null, +any_value(false, false) OVER w AS any_value_contain_null, last_value(false) OVER w AS last_value, last_value(true, true) OVER w AS last_value_ignore_null, last_value(false, false) OVER w AS last_value_contain_null @@ -515,17 +521,17 @@ FROM testData WINDOW w AS () ORDER BY cate, val -- !query schema -struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean> +struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,any_value:boolean,any_value_ignore_null:boolean,any_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean> -- !query output -NULL NULL false true false false true false -3 NULL false true false false true false -NULL a false true false false true false -1 a false true false false true false -1 a false true false false true false -2 a false true false false true false -1 b false true false false true false -2 b false true false false true false -3 b false true false false true false +NULL NULL false true false false true false false true false +3 NULL false true false false true false false true false +NULL a false true false false true false false true false +1 a false true false false true false false true false +1 a false true false false true false false true false +2 a false true false false true false false true false +1 b false true false false true false false true false +2 b false true false false true false false true false +3 b false true false false true false false true false -- !query @@ -561,31 +567,32 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays WINDOW w AS (ORDER BY salary DESC) ORDER BY salary DESC -- !query schema -struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string> +struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string> -- !query output -Larry Bott 11798 Larry Bott NULL -Gerard Bondur 11472 Larry Bott Gerard Bondur -Pamela Castillo 11303 Larry Bott Gerard Bondur -Barry Jones 10586 Larry Bott Gerard Bondur -George Vanauf 10563 Larry Bott Gerard Bondur -Loui Bondur 10449 Larry Bott Gerard Bondur -Mary Patterson 9998 Larry Bott Gerard Bondur -Steve Patterson 9441 Larry Bott Gerard Bondur -Julie Firrelli 9181 Larry Bott Gerard Bondur -Jeff Firrelli 8992 Larry Bott Gerard Bondur -William Patterson 8870 Larry Bott Gerard Bondur -Diane Murphy 8435 Larry Bott Gerard Bondur -Leslie Jennings 8113 Larry Bott Gerard Bondur -Gerard Hernandez 6949 Larry Bott Gerard Bondur -Foon Yue Tseng 6660 Larry Bott Gerard Bondur -Anthony Bow 6627 Larry Bott Gerard Bondur -Leslie Thompson 5186 Larry Bott Gerard Bondur +Larry Bott 11798 Larry Bott Larry Bott NULL +Gerard Bondur 11472 Larry Bott Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Larry Bott Gerard Bondur -- !query @@ -593,31 +600,32 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ORDER BY salary DESC -- !query schema -struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string> +struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string> -- !query output -Larry Bott 11798 Larry Bott NULL -Gerard Bondur 11472 Larry Bott Gerard Bondur -Pamela Castillo 11303 Larry Bott Gerard Bondur -Barry Jones 10586 Larry Bott Gerard Bondur -George Vanauf 10563 Larry Bott Gerard Bondur -Loui Bondur 10449 Larry Bott Gerard Bondur -Mary Patterson 9998 Larry Bott Gerard Bondur -Steve Patterson 9441 Larry Bott Gerard Bondur -Julie Firrelli 9181 Larry Bott Gerard Bondur -Jeff Firrelli 8992 Larry Bott Gerard Bondur -William Patterson 8870 Larry Bott Gerard Bondur -Diane Murphy 8435 Larry Bott Gerard Bondur -Leslie Jennings 8113 Larry Bott Gerard Bondur -Gerard Hernandez 6949 Larry Bott Gerard Bondur -Foon Yue Tseng 6660 Larry Bott Gerard Bondur -Anthony Bow 6627 Larry Bott Gerard Bondur -Leslie Thompson 5186 Larry Bott Gerard Bondur +Larry Bott 11798 Larry Bott Larry Bott NULL +Gerard Bondur 11472 Larry Bott Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Larry Bott Gerard Bondur -- !query @@ -625,31 +633,32 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ORDER BY salary DESC -- !query schema -struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string> +struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string> -- !query output -Larry Bott 11798 Larry Bott NULL -Gerard Bondur 11472 Larry Bott Gerard Bondur -Pamela Castillo 11303 Larry Bott Gerard Bondur -Barry Jones 10586 Larry Bott Gerard Bondur -George Vanauf 10563 Larry Bott Gerard Bondur -Loui Bondur 10449 Larry Bott Gerard Bondur -Mary Patterson 9998 Larry Bott Gerard Bondur -Steve Patterson 9441 Larry Bott Gerard Bondur -Julie Firrelli 9181 Larry Bott Gerard Bondur -Jeff Firrelli 8992 Larry Bott Gerard Bondur -William Patterson 8870 Larry Bott Gerard Bondur -Diane Murphy 8435 Larry Bott Gerard Bondur -Leslie Jennings 8113 Larry Bott Gerard Bondur -Gerard Hernandez 6949 Larry Bott Gerard Bondur -Foon Yue Tseng 6660 Larry Bott Gerard Bondur -Anthony Bow 6627 Larry Bott Gerard Bondur -Leslie Thompson 5186 Larry Bott Gerard Bondur +Larry Bott 11798 Larry Bott Larry Bott NULL +Gerard Bondur 11472 Larry Bott Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Larry Bott Gerard Bondur -- !query @@ -657,31 +666,32 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays WINDOW w AS (ORDER BY salary RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) ORDER BY salary -- !query schema -struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string> +struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string> -- !query output -Leslie Thompson 5186 Leslie Thompson NULL -Anthony Bow 6627 Leslie Thompson Anthony Bow -Foon Yue Tseng 6660 Leslie Thompson Anthony Bow -Gerard Hernandez 6949 Leslie Thompson Anthony Bow -Leslie Jennings 8113 Anthony Bow Foon Yue Tseng -Diane Murphy 8435 Anthony Bow Foon Yue Tseng -William Patterson 8870 Gerard Hernandez Leslie Jennings -Jeff Firrelli 8992 Leslie Jennings Diane Murphy -Julie Firrelli 9181 Leslie Jennings Diane Murphy -Steve Patterson 9441 Leslie Jennings Diane Murphy -Mary Patterson 9998 Leslie Jennings Diane Murphy -Loui Bondur 10449 William Patterson Jeff Firrelli -George Vanauf 10563 William Patterson Jeff Firrelli -Barry Jones 10586 William Patterson Jeff Firrelli -Pamela Castillo 11303 Steve Patterson Mary Patterson -Gerard Bondur 11472 Mary Patterson Loui Bondur -Larry Bott 11798 Mary Patterson Loui Bondur +Leslie Thompson 5186 Leslie Thompson Leslie Thompson NULL +Anthony Bow 6627 Leslie Thompson Leslie Thompson Anthony Bow +Foon Yue Tseng 6660 Leslie Thompson Leslie Thompson Anthony Bow +Gerard Hernandez 6949 Leslie Thompson Leslie Thompson Anthony Bow +Leslie Jennings 8113 Anthony Bow Anthony Bow Foon Yue Tseng +Diane Murphy 8435 Anthony Bow Anthony Bow Foon Yue Tseng +William Patterson 8870 Gerard Hernandez Gerard Hernandez Leslie Jennings +Jeff Firrelli 8992 Leslie Jennings Leslie Jennings Diane Murphy +Julie Firrelli 9181 Leslie Jennings Leslie Jennings Diane Murphy +Steve Patterson 9441 Leslie Jennings Leslie Jennings Diane Murphy +Mary Patterson 9998 Leslie Jennings Leslie Jennings Diane Murphy +Loui Bondur 10449 William Patterson William Patterson Jeff Firrelli +George Vanauf 10563 William Patterson William Patterson Jeff Firrelli +Barry Jones 10586 William Patterson William Patterson Jeff Firrelli +Pamela Castillo 11303 Steve Patterson Steve Patterson Mary Patterson +Gerard Bondur 11472 Mary Patterson Mary Patterson Loui Bondur +Larry Bott 11798 Mary Patterson Mary Patterson Loui Bondur -- !query @@ -689,31 +699,32 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) ORDER BY salary DESC -- !query schema -struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string> +struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string> -- !query output -Larry Bott 11798 Larry Bott Gerard Bondur -Gerard Bondur 11472 Larry Bott Gerard Bondur -Pamela Castillo 11303 Larry Bott Gerard Bondur -Barry Jones 10586 Gerard Bondur Pamela Castillo -George Vanauf 10563 Pamela Castillo Barry Jones -Loui Bondur 10449 Barry Jones George Vanauf -Mary Patterson 9998 George Vanauf Loui Bondur -Steve Patterson 9441 Loui Bondur Mary Patterson -Julie Firrelli 9181 Mary Patterson Steve Patterson -Jeff Firrelli 8992 Steve Patterson Julie Firrelli -William Patterson 8870 Julie Firrelli Jeff Firrelli -Diane Murphy 8435 Jeff Firrelli William Patterson -Leslie Jennings 8113 William Patterson Diane Murphy -Gerard Hernandez 6949 Diane Murphy Leslie Jennings -Foon Yue Tseng 6660 Leslie Jennings Gerard Hernandez -Anthony Bow 6627 Gerard Hernandez Foon Yue Tseng -Leslie Thompson 5186 Foon Yue Tseng Anthony Bow +Larry Bott 11798 Larry Bott Larry Bott Gerard Bondur +Gerard Bondur 11472 Larry Bott Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Larry Bott Gerard Bondur +Barry Jones 10586 Gerard Bondur Gerard Bondur Pamela Castillo +George Vanauf 10563 Pamela Castillo Pamela Castillo Barry Jones +Loui Bondur 10449 Barry Jones Barry Jones George Vanauf +Mary Patterson 9998 George Vanauf George Vanauf Loui Bondur +Steve Patterson 9441 Loui Bondur Loui Bondur Mary Patterson +Julie Firrelli 9181 Mary Patterson Mary Patterson Steve Patterson +Jeff Firrelli 8992 Steve Patterson Steve Patterson Julie Firrelli +William Patterson 8870 Julie Firrelli Julie Firrelli Jeff Firrelli +Diane Murphy 8435 Jeff Firrelli Jeff Firrelli William Patterson +Leslie Jennings 8113 William Patterson William Patterson Diane Murphy +Gerard Hernandez 6949 Diane Murphy Diane Murphy Leslie Jennings +Foon Yue Tseng 6660 Leslie Jennings Leslie Jennings Gerard Hernandez +Anthony Bow 6627 Gerard Hernandez Gerard Hernandez Foon Yue Tseng +Leslie Thompson 5186 Foon Yue Tseng Foon Yue Tseng Anthony Bow -- !query @@ -721,31 +732,32 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) ORDER BY salary DESC -- !query schema -struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string> +struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string> -- !query output -Larry Bott 11798 Larry Bott Gerard Bondur -Gerard Bondur 11472 Gerard Bondur Pamela Castillo -Pamela Castillo 11303 Pamela Castillo Barry Jones -Barry Jones 10586 Barry Jones George Vanauf -George Vanauf 10563 George Vanauf Loui Bondur -Loui Bondur 10449 Loui Bondur Mary Patterson -Mary Patterson 9998 Mary Patterson Steve Patterson -Steve Patterson 9441 Steve Patterson Julie Firrelli -Julie Firrelli 9181 Julie Firrelli Jeff Firrelli -Jeff Firrelli 8992 Jeff Firrelli William Patterson -William Patterson 8870 William Patterson Diane Murphy -Diane Murphy 8435 Diane Murphy Leslie Jennings -Leslie Jennings 8113 Leslie Jennings Gerard Hernandez -Gerard Hernandez 6949 Gerard Hernandez Foon Yue Tseng -Foon Yue Tseng 6660 Foon Yue Tseng Anthony Bow -Anthony Bow 6627 Anthony Bow Leslie Thompson -Leslie Thompson 5186 Leslie Thompson NULL +Larry Bott 11798 Larry Bott Larry Bott Gerard Bondur +Gerard Bondur 11472 Gerard Bondur Gerard Bondur Pamela Castillo +Pamela Castillo 11303 Pamela Castillo Pamela Castillo Barry Jones +Barry Jones 10586 Barry Jones Barry Jones George Vanauf +George Vanauf 10563 George Vanauf George Vanauf Loui Bondur +Loui Bondur 10449 Loui Bondur Loui Bondur Mary Patterson +Mary Patterson 9998 Mary Patterson Mary Patterson Steve Patterson +Steve Patterson 9441 Steve Patterson Steve Patterson Julie Firrelli +Julie Firrelli 9181 Julie Firrelli Julie Firrelli Jeff Firrelli +Jeff Firrelli 8992 Jeff Firrelli Jeff Firrelli William Patterson +William Patterson 8870 William Patterson William Patterson Diane Murphy +Diane Murphy 8435 Diane Murphy Diane Murphy Leslie Jennings +Leslie Jennings 8113 Leslie Jennings Leslie Jennings Gerard Hernandez +Gerard Hernandez 6949 Gerard Hernandez Gerard Hernandez Foon Yue Tseng +Foon Yue Tseng 6660 Foon Yue Tseng Foon Yue Tseng Anthony Bow +Anthony Bow 6627 Anthony Bow Anthony Bow Leslie Thompson +Leslie Thompson 5186 Leslie Thompson Leslie Thompson NULL -- !query @@ -753,31 +765,32 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ORDER BY salary DESC -- !query schema -struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string> +struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string> -- !query output -Larry Bott 11798 Larry Bott Gerard Bondur -Gerard Bondur 11472 Larry Bott Gerard Bondur -Pamela Castillo 11303 Larry Bott Gerard Bondur -Barry Jones 10586 Larry Bott Gerard Bondur -George Vanauf 10563 Larry Bott Gerard Bondur -Loui Bondur 10449 Larry Bott Gerard Bondur -Mary Patterson 9998 Larry Bott Gerard Bondur -Steve Patterson 9441 Larry Bott Gerard Bondur -Julie Firrelli 9181 Larry Bott Gerard Bondur -Jeff Firrelli 8992 Larry Bott Gerard Bondur -William Patterson 8870 Larry Bott Gerard Bondur -Diane Murphy 8435 Larry Bott Gerard Bondur -Leslie Jennings 8113 Larry Bott Gerard Bondur -Gerard Hernandez 6949 Larry Bott Gerard Bondur -Foon Yue Tseng 6660 Larry Bott Gerard Bondur -Anthony Bow 6627 Larry Bott Gerard Bondur -Leslie Thompson 5186 Larry Bott Gerard Bondur +Larry Bott 11798 Larry Bott Larry Bott Gerard Bondur +Gerard Bondur 11472 Larry Bott Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Larry Bott Gerard Bondur -- !query @@ -785,31 +798,32 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ORDER BY salary DESC -- !query schema -struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string> +struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string> -- !query output -Larry Bott 11798 Larry Bott Gerard Bondur -Gerard Bondur 11472 Larry Bott Gerard Bondur -Pamela Castillo 11303 Larry Bott Gerard Bondur -Barry Jones 10586 Larry Bott Gerard Bondur -George Vanauf 10563 Larry Bott Gerard Bondur -Loui Bondur 10449 Larry Bott Gerard Bondur -Mary Patterson 9998 Larry Bott Gerard Bondur -Steve Patterson 9441 Larry Bott Gerard Bondur -Julie Firrelli 9181 Larry Bott Gerard Bondur -Jeff Firrelli 8992 Larry Bott Gerard Bondur -William Patterson 8870 Larry Bott Gerard Bondur -Diane Murphy 8435 Larry Bott Gerard Bondur -Leslie Jennings 8113 Larry Bott Gerard Bondur -Gerard Hernandez 6949 Larry Bott Gerard Bondur -Foon Yue Tseng 6660 Larry Bott Gerard Bondur -Anthony Bow 6627 Larry Bott Gerard Bondur -Leslie Thompson 5186 Larry Bott Gerard Bondur +Larry Bott 11798 Larry Bott Larry Bott Gerard Bondur +Gerard Bondur 11472 Larry Bott Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Larry Bott Gerard Bondur -- !query @@ -817,31 +831,32 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) ORDER BY salary DESC -- !query schema -struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string> +struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string> -- !query output -Larry Bott 11798 Larry Bott Gerard Bondur -Gerard Bondur 11472 Larry Bott Gerard Bondur -Pamela Castillo 11303 Larry Bott Gerard Bondur -Barry Jones 10586 Larry Bott Gerard Bondur -George Vanauf 10563 Larry Bott Gerard Bondur -Loui Bondur 10449 Larry Bott Gerard Bondur -Mary Patterson 9998 Larry Bott Gerard Bondur -Steve Patterson 9441 Larry Bott Gerard Bondur -Julie Firrelli 9181 Larry Bott Gerard Bondur -Jeff Firrelli 8992 Larry Bott Gerard Bondur -William Patterson 8870 Larry Bott Gerard Bondur -Diane Murphy 8435 Larry Bott Gerard Bondur -Leslie Jennings 8113 Larry Bott Gerard Bondur -Gerard Hernandez 6949 Larry Bott Gerard Bondur -Foon Yue Tseng 6660 Larry Bott Gerard Bondur -Anthony Bow 6627 Larry Bott Gerard Bondur -Leslie Thompson 5186 Larry Bott Gerard Bondur +Larry Bott 11798 Larry Bott Larry Bott Gerard Bondur +Gerard Bondur 11472 Larry Bott Larry Bott Gerard Bondur +Pamela Castillo 11303 Larry Bott Larry Bott Gerard Bondur +Barry Jones 10586 Larry Bott Larry Bott Gerard Bondur +George Vanauf 10563 Larry Bott Larry Bott Gerard Bondur +Loui Bondur 10449 Larry Bott Larry Bott Gerard Bondur +Mary Patterson 9998 Larry Bott Larry Bott Gerard Bondur +Steve Patterson 9441 Larry Bott Larry Bott Gerard Bondur +Julie Firrelli 9181 Larry Bott Larry Bott Gerard Bondur +Jeff Firrelli 8992 Larry Bott Larry Bott Gerard Bondur +William Patterson 8870 Larry Bott Larry Bott Gerard Bondur +Diane Murphy 8435 Larry Bott Larry Bott Gerard Bondur +Leslie Jennings 8113 Larry Bott Larry Bott Gerard Bondur +Gerard Hernandez 6949 Larry Bott Larry Bott Gerard Bondur +Foon Yue Tseng 6660 Larry Bott Larry Bott Gerard Bondur +Anthony Bow 6627 Larry Bott Larry Bott Gerard Bondur +Leslie Thompson 5186 Larry Bott Larry Bott Gerard Bondur -- !query @@ -886,6 +901,7 @@ SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -898,13 +914,14 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -[INVALID_SQL_SYNTAX] Invalid SQL syntax: The definition of window `w` is repetitive.(line 8, pos 0) +[INVALID_SQL_SYNTAX] Invalid SQL syntax: The definition of window `w` is repetitive.(line 9, pos 0) == SQL == SELECT employee_name, salary, first_value(employee_name) OVER w highest_salary, + any_value(employee_name) OVER w highest_salary, nth_value(employee_name, 2) OVER w second_highest_salary FROM basic_pays @@ -932,23 +949,24 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null WINDOW w AS (ORDER BY id) ORDER BY id -- !query schema -struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string> +struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string> -- !query output -a 0 NULL NULL x y z NULL NULL NULL NULL NULL NULL NULL NULL NULL -a 1 x x y z v x NULL NULL NULL x NULL NULL x x -b 2 NULL NULL y z v NULL x NULL NULL x NULL NULL x x -c 3 NULL NULL y z v NULL x NULL NULL x NULL NULL x x -a 4 y y z v NULL y x NULL NULL x y NULL x y -b 5 NULL NULL z v NULL NULL y x NULL x y NULL x y -a 6 z z v NULL NULL z y x NULL x y z x z -a 7 v v NULL NULL NULL v z y x x y z x v -a 8 NULL NULL NULL NULL NULL NULL v z y x y z x v +a 0 NULL NULL x y z NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +a 1 x x y z v x NULL NULL NULL x NULL NULL x x x +b 2 NULL NULL y z v NULL x NULL NULL x NULL NULL x x x +c 3 NULL NULL y z v NULL x NULL NULL x NULL NULL x x x +a 4 y y z v NULL y x NULL NULL x y NULL x x y +b 5 NULL NULL z v NULL NULL y x NULL x y NULL x x y +a 6 z z v NULL NULL z y x NULL x y z x x z +a 7 v v NULL NULL NULL v z y x x y z x x v +a 8 NULL NULL NULL NULL NULL NULL v z y x y z x x v -- !query @@ -960,23 +978,24 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ORDER BY id -- !query schema -struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string> +struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string> -- !query output -a 0 NULL NULL NULL NULL NULL NULL -a 1 x x NULL NULL x x -b 2 NULL x NULL NULL x x -c 3 NULL x NULL NULL x x -a 4 y x y NULL x y -b 5 NULL x y NULL x y -a 6 z x y z x z -a 7 v x y z x v -a 8 NULL x y z x v +a 0 NULL NULL NULL NULL NULL NULL NULL +a 1 x x NULL NULL x x x +b 2 NULL x NULL NULL x x x +c 3 NULL x NULL NULL x x x +a 4 y x y NULL x x y +b 5 NULL x y NULL x x y +a 6 z x y z x x z +a 7 v x y z x x v +a 8 NULL x y z x x v -- !query @@ -988,23 +1007,24 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ORDER BY id -- !query schema -struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string> +struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string> -- !query output -a 0 NULL NULL NULL NULL NULL NULL -a 1 x x NULL NULL x x -b 2 NULL x NULL NULL x x -c 3 NULL x NULL NULL x x -a 4 y x y NULL x y -b 5 NULL x y NULL x y -a 6 z x y z x z -a 7 v x y z x v -a 8 NULL x y z x v +a 0 NULL NULL NULL NULL NULL NULL NULL +a 1 x x NULL NULL x x x +b 2 NULL x NULL NULL x x x +c 3 NULL x NULL NULL x x x +a 4 y x y NULL x x y +b 5 NULL x y NULL x x y +a 6 z x y z x x z +a 7 v x y z x x v +a 8 NULL x y z x x v -- !query @@ -1016,23 +1036,24 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null WINDOW w AS (ORDER BY id RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING) ORDER BY id -- !query schema -struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string> +struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string> -- !query output -a 0 NULL x NULL NULL x x -a 1 x x NULL NULL x x -b 2 NULL x y NULL x y -c 3 NULL x y NULL x y -a 4 y y z NULL y z -b 5 NULL y z v y v -a 6 z y z v y v -a 7 v z v NULL z v -a 8 NULL z v NULL z v +a 0 NULL x NULL NULL x x x +a 1 x x NULL NULL x x x +b 2 NULL x y NULL x x y +c 3 NULL x y NULL x x y +a 4 y y z NULL y y z +b 5 NULL y z v y y v +a 6 z y z v y y v +a 7 v z v NULL z z v +a 8 NULL z v NULL z z v -- !query @@ -1044,23 +1065,24 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null WINDOW w AS (ORDER BY id ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) ORDER BY id -- !query schema -struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string> +struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string> -- !query output -a 0 NULL x NULL NULL x x -a 1 x x NULL NULL x x -b 2 NULL x y NULL x y -c 3 NULL x y NULL x y -a 4 y y z NULL y z -b 5 NULL y z v y v -a 6 z y z v y v -a 7 v z v NULL z v -a 8 NULL z v NULL z v +a 0 NULL x NULL NULL x x x +a 1 x x NULL NULL x x x +b 2 NULL x y NULL x x y +c 3 NULL x y NULL x x y +a 4 y y z NULL y y z +b 5 NULL y z v y y v +a 6 z y z v y y v +a 7 v z v NULL z z v +a 8 NULL z v NULL z z v -- !query @@ -1072,23 +1094,24 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null WINDOW w AS (ORDER BY id RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) ORDER BY id -- !query schema -struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string> +struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string> -- !query output -a 0 NULL x y z x v -a 1 x x y z x v -b 2 NULL y z v y v -c 3 NULL y z v y v -a 4 y y z v y v -b 5 NULL z v NULL z v -a 6 z z v NULL z v -a 7 v v NULL NULL v v -a 8 NULL NULL NULL NULL NULL NULL +a 0 NULL x y z x x v +a 1 x x y z x x v +b 2 NULL y z v y y v +c 3 NULL y z v y y v +a 4 y y z v y y v +b 5 NULL z v NULL z z v +a 6 z z v NULL z z v +a 7 v v NULL NULL v v v +a 8 NULL NULL NULL NULL NULL NULL NULL -- !query @@ -1100,23 +1123,24 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ORDER BY id -- !query schema -struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string> +struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string> -- !query output -a 0 NULL x y z x v -a 1 x x y z x v -b 2 NULL x y z x v -c 3 NULL x y z x v -a 4 y x y z x v -b 5 NULL x y z x v -a 6 z x y z x v -a 7 v x y z x v -a 8 NULL x y z x v +a 0 NULL x y z x x v +a 1 x x y z x x v +b 2 NULL x y z x x v +c 3 NULL x y z x x v +a 4 y x y z x x v +b 5 NULL x y z x x v +a 6 z x y z x x v +a 7 v x y z x x v +a 8 NULL x y z x x v -- !query @@ -1128,23 +1152,24 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ORDER BY id -- !query schema -struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string> +struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string> -- !query output -a 0 NULL x y z x v -a 1 x x y z x v -b 2 NULL x y z x v -c 3 NULL x y z x v -a 4 y x y z x v -b 5 NULL x y z x v -a 6 z x y z x v -a 7 v x y z x v -a 8 NULL x y z x v +a 0 NULL x y z x x v +a 1 x x y z x x v +b 2 NULL x y z x x v +c 3 NULL x y z x x v +a 4 y x y z x x v +b 5 NULL x y z x x v +a 6 z x y z x x v +a 7 v x y z x x v +a 8 NULL x y z x x v -- !query @@ -1156,23 +1181,24 @@ SELECT nth_value(v, 2) IGNORE NULLS OVER w nth_value_2, nth_value(v, 3) IGNORE NULLS OVER w nth_value_3, first_value(v) IGNORE NULLS OVER w first_value, + any_value(v) IGNORE NULLS OVER w any_value, last_value(v) IGNORE NULLS OVER w last_value FROM test_ignore_null WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) ORDER BY id -- !query schema -struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string> --- !query output -a 0 NULL x NULL NULL x x -a 1 x x NULL NULL x x -b 2 NULL x NULL NULL x x -c 3 NULL x y NULL x y -a 4 y x y NULL x y -b 5 NULL x y z x z -a 6 z x y z x v -a 7 v x y z x v -a 8 NULL x y z x v +struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string> +-- !query output +a 0 NULL x NULL NULL x x x +a 1 x x NULL NULL x x x +b 2 NULL x NULL NULL x x x +c 3 NULL x y NULL x x y +a 4 y x y NULL x x y +b 5 NULL x y z x x z +a 6 z x y z x x v +a 7 v x y z x x v +a 8 NULL x y z x x v -- !query --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org