This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6d77aa54d04 [SPARK-41637][SQL] ORDER BY ALL 6d77aa54d04 is described below commit 6d77aa54d04a8f8ee7533b146bc24f5fa0518f7d Author: Reynold Xin <r...@databricks.com> AuthorDate: Fri Dec 23 12:52:36 2022 +0800 [SPARK-41637][SQL] ORDER BY ALL ### What changes were proposed in this pull request? This patch adds ORDER BY ALL support to SQL. ORDER BY ALL is a syntactic sugar to sort the output by all the fields, from left to right. It also allows specifying asc/desc as well as null ordering. ### Why are the changes needed? It's a good convenience sugar added initially by DuckDB to avoid repeating the fields. ### Does this PR introduce _any_ user-facing change? Yes. See above. ### How was this patch tested? Added SQL tests. Closes #39144 from rxin/orderbyall. Authored-by: Reynold Xin <r...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/analysis/Analyzer.scala | 1 + .../sql/catalyst/analysis/ResolveOrderByAll.scala | 81 +++++++++ .../sql/catalyst/rules/RuleIdCollection.scala | 1 + .../resources/sql-tests/inputs/order-by-all.sql | 44 +++++ .../sql-tests/results/order-by-all.sql.out | 202 +++++++++++++++++++++ 5 files changed, 329 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ea3b81722af..ccaf07a4d52 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -297,6 +297,7 @@ class Analyzer(override val catalogManager: CatalogManager) ResolveGroupingAnalytics :: ResolvePivot :: ResolveUnpivot :: + ResolveOrderByAll :: ResolveGroupByAll :: ResolveOrdinalInOrderByAndGroupBy :: ResolveAggAliasInGroupBy :: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveOrderByAll.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveOrderByAll.scala new file mode 100644 index 00000000000..7cf584dadcf --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveOrderByAll.scala @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.catalyst.expressions.SortOrder +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Sort} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreePattern.{SORT, UNRESOLVED_ATTRIBUTE} + +/** + * Resolve "order by all" in the following SQL pattern: + * `select col1, col2 from table order by all`. + * + * It orders the query result by all columns, from left to right. The query above becomes: + * + * `select col1, col2 from table order by col1, col2` + * + * This should also support specifying asc/desc, and nulls first/last. + */ +object ResolveOrderByAll extends Rule[LogicalPlan] { + + val ALL = "ALL" + + /** + * An extractor to pull out the SortOrder field in the ORDER BY ALL clause. We pull out that + * SortOrder object so we can pass its direction and null ordering. + */ + object OrderByAll { + def unapply(s: Sort): Option[SortOrder] = { + // This only applies to global ordering. + if (!s.global) { + return None + } + // Don't do this if we have more than one order field. That means it's not order by all. + if (s.order.size != 1) { + return None + } + // Don't do this if there's a child field called ALL. That should take precedence. + if (s.child.output.exists(_.name.toUpperCase() == ALL)) { + return None + } + + s.order.find { so => + so.child match { + case a: UnresolvedAttribute => a.name.toUpperCase() == ALL + case _ => false + } + } + } + } + + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning( + _.containsAllPatterns(UNRESOLVED_ATTRIBUTE, SORT), ruleId) { + // This only makes sense if the child is resolved. + case s: Sort if s.child.resolved => + s match { + case OrderByAll(sortOrder) => + // Replace a single order by all with N fields, where N = child's output, while + // retaining the same asc/desc and nulls ordering. + val order = s.child.output.map(a => sortOrder.copy(child = a)) + s.copy(order = order) + case _ => + s + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala index 32550b8552d..41aa68f0ec6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala @@ -91,6 +91,7 @@ object RuleIdCollection { "org.apache.spark.sql.catalyst.analysis.ResolveInlineTables" :: "org.apache.spark.sql.catalyst.analysis.ResolveLambdaVariables" :: "org.apache.spark.sql.catalyst.analysis.ResolveLateralColumnAliasReference" :: + "org.apache.spark.sql.catalyst.analysis.ResolveOrderByAll" :: "org.apache.spark.sql.catalyst.analysis.ResolveTimeZone" :: "org.apache.spark.sql.catalyst.analysis.ResolveUnion" :: "org.apache.spark.sql.catalyst.analysis.SubstituteUnresolvedOrdinals" :: diff --git a/sql/core/src/test/resources/sql-tests/inputs/order-by-all.sql b/sql/core/src/test/resources/sql-tests/inputs/order-by-all.sql new file mode 100644 index 00000000000..37dfb1d3499 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/order-by-all.sql @@ -0,0 +1,44 @@ +create temporary view data as select * from values + (0, 1), + (0, 2), + (1, 3), + (1, NULL) + as data(g, i); + +-- most basic test with only 1 column +select g from data order by all; + +-- two columns +select * from data order by all; + +-- test case insensitive +select * from data order by aLl; + +-- asc/desc +select * from data order by all asc; +select * from data order by all desc; + +-- nulls first / last +select * from data order by all nulls first; +select * from data order by all nulls last; + +-- combining nulls first/last/asc/desc +select * from data order by all asc nulls first; +select * from data order by all desc nulls first; +select * from data order by all asc nulls last; +select * from data order by all desc nulls last; + +-- set operations from duckdb +select * from data union all select * from data order by all; +select * from data union select * from data order by all; + +-- limit +select * from data order by all limit 2; + +-- precedence: if there's a column already named all, reference that, instead of expanding. +-- result should be 1, 2, 3, and not 3, 2, 1 +select * from values("z", 1), ("y", 2), ("x", 3) AS T(col1, all) order by all; + +-- shouldn't work in window functions +select name, dept, rank() over (partition by dept order by all) as rank +from values('Lisa', 'Sales', 10000, 35) as T(name, dept, salary, age); diff --git a/sql/core/src/test/resources/sql-tests/results/order-by-all.sql.out b/sql/core/src/test/resources/sql-tests/results/order-by-all.sql.out new file mode 100644 index 00000000000..7612b3c30d4 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/order-by-all.sql.out @@ -0,0 +1,202 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +create temporary view data as select * from values + (0, 1), + (0, 2), + (1, 3), + (1, NULL) + as data(g, i) +-- !query schema +struct<> +-- !query output + + + +-- !query +select g from data order by all +-- !query schema +struct<g:int> +-- !query output +0 +0 +1 +1 + + +-- !query +select * from data order by all +-- !query schema +struct<g:int,i:int> +-- !query output +0 1 +0 2 +1 NULL +1 3 + + +-- !query +select * from data order by aLl +-- !query schema +struct<g:int,i:int> +-- !query output +0 1 +0 2 +1 NULL +1 3 + + +-- !query +select * from data order by all asc +-- !query schema +struct<g:int,i:int> +-- !query output +0 1 +0 2 +1 NULL +1 3 + + +-- !query +select * from data order by all desc +-- !query schema +struct<g:int,i:int> +-- !query output +1 3 +1 NULL +0 2 +0 1 + + +-- !query +select * from data order by all nulls first +-- !query schema +struct<g:int,i:int> +-- !query output +0 1 +0 2 +1 NULL +1 3 + + +-- !query +select * from data order by all nulls last +-- !query schema +struct<g:int,i:int> +-- !query output +0 1 +0 2 +1 3 +1 NULL + + +-- !query +select * from data order by all asc nulls first +-- !query schema +struct<g:int,i:int> +-- !query output +0 1 +0 2 +1 NULL +1 3 + + +-- !query +select * from data order by all desc nulls first +-- !query schema +struct<g:int,i:int> +-- !query output +1 NULL +1 3 +0 2 +0 1 + + +-- !query +select * from data order by all asc nulls last +-- !query schema +struct<g:int,i:int> +-- !query output +0 1 +0 2 +1 3 +1 NULL + + +-- !query +select * from data order by all desc nulls last +-- !query schema +struct<g:int,i:int> +-- !query output +1 3 +1 NULL +0 2 +0 1 + + +-- !query +select * from data union all select * from data order by all +-- !query schema +struct<g:int,i:int> +-- !query output +0 1 +0 1 +0 2 +0 2 +1 NULL +1 NULL +1 3 +1 3 + + +-- !query +select * from data union select * from data order by all +-- !query schema +struct<g:int,i:int> +-- !query output +0 1 +0 2 +1 NULL +1 3 + + +-- !query +select * from data order by all limit 2 +-- !query schema +struct<g:int,i:int> +-- !query output +0 1 +0 2 + + +-- !query +select * from values("z", 1), ("y", 2), ("x", 3) AS T(col1, all) order by all +-- !query schema +struct<col1:string,all:int> +-- !query output +z 1 +y 2 +x 3 + + +-- !query +select name, dept, rank() over (partition by dept order by all) as rank +from values('Lisa', 'Sales', 10000, 35) as T(name, dept, salary, age) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42000", + "messageParameters" : { + "objectName" : "`all`", + "proposal" : "`T`.`age`, `T`.`name`, `T`.`dept`, `T`.`salary`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 60, + "stopIndex" : 62, + "fragment" : "all" + } ] +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org