cloud-fan commented on code in PR #54859:
URL: https://github.com/apache/spark/pull/54859#discussion_r3132073342
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/NormalizePlanSuite.scala:
##########
@@ -117,6 +119,176 @@ class NormalizePlanSuite extends SparkFunSuite with
SQLConfHelper {
assert(NormalizePlan(baselinePlan) == NormalizePlan(testPlan))
}
+ test("Normalize ordering in a project list of an inner Project under Project
and Filter") {
+ val baselinePlan =
+ LocalRelation($"col1".int, $"col2".string)
+ .select($"col1", $"col2")
+ .where($"col1" === 1)
+ .select($"col1")
+ val testPlan =
+ LocalRelation($"col1".int, $"col2".string)
+ .select($"col2", $"col1")
+ .where($"col1" === 1)
+ .select($"col1")
+
+ assert(baselinePlan != testPlan)
+ assert(NormalizePlan(baselinePlan) == NormalizePlan(testPlan))
+ }
+
+ test("SubqueryAlias resets normalizeProjectList flag for inner Project") {
+ // Project under SubqueryAlias should NOT be normalized even when above a
Project.
+ // The SubqueryAlias boundary preserves the schema, so column order
matters.
+ val baselinePlan = LocalRelation($"col1".int, $"col2".string)
+ .select($"col1", $"col2")
+ .subquery("t")
+ .select($"col1")
+ val testPlan = LocalRelation($"col1".int, $"col2".string)
+ .select($"col2", $"col1")
+ .subquery("t")
+ .select($"col1")
+
+ // The inner Project has different column order, and SubqueryAlias resets
the flag,
+ // so normalization should NOT make them equal.
+ assert(NormalizePlan(baselinePlan) != NormalizePlan(testPlan))
+ }
+
+ test("SubqueryAlias resets normalizeProjectList flag for inner Aggregate") {
+ // Aggregate under SubqueryAlias should NOT have its list normalized.
+ val baselinePlan = LocalRelation($"col1".int, $"col2".string)
+ .groupBy($"col1", $"col2")($"col1", $"col2")
+ .subquery("t")
+ .select($"col1")
+ val testPlan = LocalRelation($"col1".int, $"col2".string)
+ .groupBy($"col1", $"col2")($"col2", $"col1")
+ .subquery("t")
+ .select($"col1")
+
+ assert(NormalizePlan(baselinePlan) != NormalizePlan(testPlan))
+ }
+
+ test("Nested SubqueryAlias resets flag even under multiple Projects") {
+ // Project -> Project -> SubqueryAlias -> Project: the innermost Project
should NOT
+ // be normalized because SubqueryAlias resets the flag.
+ val baselinePlan = LocalRelation($"col1".int, $"col2".string)
+ .select($"col1", $"col2")
+ .subquery("t")
+ .select($"col1", $"col2")
+ .select($"col1")
+ val testPlan = LocalRelation($"col1".int, $"col2".string)
+ .select($"col2", $"col1")
+ .subquery("t")
+ .select($"col1", $"col2")
+ .select($"col1")
+
+ // The inner Project (below SubqueryAlias) differs in order and should NOT
be normalized.
+ assert(NormalizePlan(baselinePlan) != NormalizePlan(testPlan))
+ }
+
+ test("Project above SubqueryAlias IS normalized when under another Project")
{
+ // Project -> Project -> SubqueryAlias -> relation
+ // The middle Project (between outer Project and SubqueryAlias) should
still be normalized
+ // because it's under a Project, and SubqueryAlias only resets the flag
for its children.
+ val baselinePlan = LocalRelation($"col1".int, $"col2".string)
+ .subquery("t")
+ .select($"col1", $"col2")
+ .select($"col1")
+ val testPlan = LocalRelation($"col1".int, $"col2".string)
+ .subquery("t")
+ .select($"col2", $"col1")
+ .select($"col1")
+
+ assert(baselinePlan != testPlan)
+ assert(NormalizePlan(baselinePlan) == NormalizePlan(testPlan))
+ }
+
+ test("SubqueryAlias resets flag but Project above SubqueryAlias still
normalizes with Filter") {
Review Comment:
Test name says "Project above SubqueryAlias still normalizes", but the plan
uses `.groupBy(…)` (an Aggregate) above the SubqueryAlias — the next-line
comment `// Aggregate -> Filter -> SubqueryAlias -> Project` confirms. Consider
renaming (or switching the top to a real `Project` if that was the intent):
```suggestion
test("SubqueryAlias resets flag but Aggregate above SubqueryAlias still
normalizes with Filter") {
```
Also: the assertion only checks that normalization does **not** collapse the
two plans — it never verifies that the Aggregate above is actually normalized.
If the intent was to cover "the structure above SubqueryAlias still
normalizes", consider adding an equality assertion against a baseline where
only the inner Project order differs.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]