This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9353d67f929 [SPARK-43851][SQL] Support LCA in grouping expressions
9353d67f929 is described below

commit 9353d67f9290bae1e7d7e16a2caf5256cc4e2f92
Author: Jia Fan <fanjiaemi...@qq.com>
AuthorDate: Sat Jul 1 08:48:10 2023 +0300

    [SPARK-43851][SQL] Support LCA in grouping expressions
    
    ### What changes were proposed in this pull request?
    This PR bring support lateral column alias reference in grouping 
expressions.
    
    ### Why are the changes needed?
    add new feature for LCA
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    exist test
    
    Closes #41804 from Hisoka-X/SPARK-43851_LCA_in_group.
    
    Authored-by: Jia Fan <fanjiaemi...@qq.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../src/main/resources/error/error-classes.json    |  5 -----
 ...r-conditions-unsupported-feature-error-class.md |  4 ----
 .../analysis/ResolveReferencesInAggregate.scala    | 22 ++++++++----------
 .../column-resolution-aggregate.sql.out            | 26 +++++++++++++---------
 .../results/column-resolution-aggregate.sql.out    | 16 ++++---------
 5 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json 
b/common/utils/src/main/resources/error/error-classes.json
index 3cc35d668e0..eabd5533e13 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -2530,11 +2530,6 @@
           "Referencing lateral column alias <lca> in the aggregate query both 
with window expressions and with having clause. Please rewrite the aggregate 
query by removing the having clause or removing lateral alias reference in the 
SELECT list."
         ]
       },
-      "LATERAL_COLUMN_ALIAS_IN_GROUP_BY" : {
-        "message" : [
-          "Referencing a lateral column alias via GROUP BY alias/ALL is not 
supported yet."
-        ]
-      },
       "LATERAL_COLUMN_ALIAS_IN_WINDOW" : {
         "message" : [
           "Referencing a lateral column alias <lca> in window expression 
<windowExpr>."
diff --git a/docs/sql-error-conditions-unsupported-feature-error-class.md 
b/docs/sql-error-conditions-unsupported-feature-error-class.md
index 64d7eb347e5..78bf301c49d 100644
--- a/docs/sql-error-conditions-unsupported-feature-error-class.md
+++ b/docs/sql-error-conditions-unsupported-feature-error-class.md
@@ -65,10 +65,6 @@ Referencing a lateral column alias `<lca>` in the aggregate 
function `<aggFunc>`
 
 Referencing lateral column alias `<lca>` in the aggregate query both with 
window expressions and with having clause. Please rewrite the aggregate query 
by removing the having clause or removing lateral alias reference in the SELECT 
list.
 
-## LATERAL_COLUMN_ALIAS_IN_GROUP_BY
-
-Referencing a lateral column alias via GROUP BY alias/ALL is not supported yet.
-
 ## LATERAL_COLUMN_ALIAS_IN_WINDOW
 
 Referencing a lateral column alias `<lca>` in window expression `<windowExpr>`.
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
index 09ae87b071f..41bcb337c67 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute, 
Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute, 
Expression, LateralColumnAliasReference, NamedExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, AppendColumns, 
LogicalPlan}
 import 
org.apache.spark.sql.catalyst.trees.TreePattern.{LATERAL_COLUMN_ALIAS_REFERENCE,
 UNRESOLVED_ATTRIBUTE}
@@ -74,12 +73,6 @@ object ResolveReferencesInAggregate extends SQLConfHelper
         resolvedAggExprsWithOuter,
         resolveGroupByAlias(resolvedAggExprsWithOuter, 
resolvedGroupExprsNoOuter)
       ).map(resolveOuterRef)
-      // TODO: currently we don't support LCA in `groupingExpressions` yet.
-      if (resolved.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE))) {
-        throw new AnalysisException(
-          errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
-          messageParameters = Map.empty)
-      }
       resolved
     } else {
       // Do not resolve columns in grouping expressions to outer references 
here, as the aggregate
@@ -112,9 +105,11 @@ object ResolveReferencesInAggregate extends SQLConfHelper
     assert(selectList.forall(_.resolved))
     if (conf.groupByAliases) {
       groupExprs.map { g =>
-        g.transformWithPruning(_.containsPattern(UNRESOLVED_ATTRIBUTE)) {
-          case u: UnresolvedAttribute =>
-            selectList.find(ne => conf.resolver(ne.name, u.name)).getOrElse(u)
+        g.transformWithPruning(_.containsAnyPattern(UNRESOLVED_ATTRIBUTE,
+          LATERAL_COLUMN_ALIAS_REFERENCE)) {
+          case u @ (_: UnresolvedAttribute | _: LateralColumnAliasReference) =>
+            selectList.find(ne => conf.resolver(ne.name, 
u.asInstanceOf[NamedExpression].name))
+              .getOrElse(u)
         }
       }
     } else {
@@ -133,8 +128,9 @@ object ResolveReferencesInAggregate extends SQLConfHelper
         // tell the user in checkAnalysis that we cannot resolve the all in 
group by.
         groupExprs
       } else {
-        // This is a valid GROUP BY ALL aggregate.
-        expandedGroupExprs.get
+        // This is a valid GROUP BY ALL aggregate, resolve group by alias 
again to transform the
+        // LCA reference
+        resolveGroupByAlias(selectList, expandedGroupExprs.get)
       }
     } else {
       groupExprs
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
index eb30443cbae..3dab6c386cb 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
@@ -94,21 +94,27 @@ org.apache.spark.sql.AnalysisException
 -- !query
 SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY k, col
 -- !query analysis
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
-  "sqlState" : "0A000"
-}
+Project [lca#x, (lca#x + 1) AS col#x]
++- Project [k#x, k#x AS lca#x]
+   +- Aggregate [k#x, (k#x + 1)], [k#x]
+      +- SubqueryAlias v1
+         +- View (`v1`, [a#x,b#x,k#x])
+            +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, 
cast(k#x as int) AS k#x]
+               +- SubqueryAlias t
+                  +- LocalRelation [a#x, b#x, k#x]
 
 
 -- !query
 SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY all
 -- !query analysis
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
-  "sqlState" : "0A000"
-}
+Project [lca#x, (lca#x + 1) AS col#x]
++- Project [k#x, k#x AS lca#x]
+   +- Aggregate [k#x, (k#x + 1)], [k#x]
+      +- SubqueryAlias v1
+         +- View (`v1`, [a#x,b#x,k#x])
+            +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, 
cast(k#x as int) AS k#x]
+               +- SubqueryAlias t
+                  +- LocalRelation [a#x, b#x, k#x]
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out
index e8ab766751c..e0bbcae91f1 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out
@@ -91,25 +91,17 @@ org.apache.spark.sql.AnalysisException
 -- !query
 SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY k, col
 -- !query schema
-struct<>
+struct<lca:int,col:int>
 -- !query output
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
-  "sqlState" : "0A000"
-}
+1      2
 
 
 -- !query
 SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY all
 -- !query schema
-struct<>
+struct<lca:int,col:int>
 -- !query output
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
-  "sqlState" : "0A000"
-}
+1      2
 
 
 -- !query


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to