This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new d210b5e [SPARK-34776][SQL] Nested column pruning should not prune Window produced attributes d210b5e is described below commit d210b5ed6e881ddbb1ee5f523dc4711f7fcc0762 Author: Liang-Chi Hsieh <vii...@gmail.com> AuthorDate: Fri Mar 19 11:44:02 2021 -0700 [SPARK-34776][SQL] Nested column pruning should not prune Window produced attributes ### What changes were proposed in this pull request? This patch proposes to fix a bug related to `NestedColumnAliasing`. The root cause is `Window` doesn't override `producedAttributes` so `NestedColumnAliasing` rule wrongly prune attributes produced by `Window`. The master and branch-3.1 both have this issue. ### Why are the changes needed? It is needed to fix a bug of nested column pruning. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. Closes #31897 from viirya/SPARK-34776. Authored-by: Liang-Chi Hsieh <vii...@gmail.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> (cherry picked from commit 7a8a600995ddee32f0a9c81a97be3fc2bca21928) Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../plans/logical/basicLogicalOperators.scala | 2 ++ .../scala/org/apache/spark/sql/DataFrameSuite.scala | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index ab24aa4..338c1db 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -639,6 +639,8 @@ case class Window( override def output: Seq[Attribute] = child.output ++ windowExpressions.map(_.toAttribute) + override def producedAttributes: AttributeSet = windowOutputSet + def windowOutputSet: AttributeSet = AttributeSet(windowExpressions.map(_.toAttribute)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 8d95f83..594b142 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -41,6 +41,7 @@ import org.apache.spark.sql.execution.{FilterExec, QueryExecution, WholeStageCod import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.aggregate.HashAggregateExec import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec} +import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession} @@ -2584,6 +2585,24 @@ class DataFrameSuite extends QueryTest val col2 = df.colRegex("test\n_table.`tes.*\n.*mn`") checkAnswer(df.select(col2), Row(1) :: Row(2) :: Row(3) :: Nil) } + + test("SPARK-34776: Nested column pruning should not prune Window produced attributes") { + val df = Seq( + ("t1", "123", "bob"), + ("t1", "456", "bob"), + ("t2", "123", "sam") + ).toDF("type", "value", "name") + + val test = df.select( + $"*", + struct(count($"*").over(Window.partitionBy($"type", $"value", $"name")) + .as("count"), $"name").as("name_count") + ).select( + $"*", + max($"name_count").over(Window.partitionBy($"type", $"value")).as("best_name") + ) + checkAnswer(test.select($"best_name.name"), Row("bob") :: Row("bob") :: Row("sam") :: Nil) + } } case class GroupByKey(a: Int, b: Int) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org