cloud-fan commented on code in PR #54859:
URL: https://github.com/apache/spark/pull/54859#discussion_r3132073342


##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/NormalizePlanSuite.scala:
##########
@@ -117,6 +119,176 @@ class NormalizePlanSuite extends SparkFunSuite with 
SQLConfHelper {
     assert(NormalizePlan(baselinePlan) == NormalizePlan(testPlan))
   }
 
+  test("Normalize ordering in a project list of an inner Project under Project 
and Filter") {
+    val baselinePlan =
+      LocalRelation($"col1".int, $"col2".string)
+        .select($"col1", $"col2")
+        .where($"col1" === 1)
+        .select($"col1")
+    val testPlan =
+      LocalRelation($"col1".int, $"col2".string)
+        .select($"col2", $"col1")
+        .where($"col1" === 1)
+        .select($"col1")
+
+    assert(baselinePlan != testPlan)
+    assert(NormalizePlan(baselinePlan) == NormalizePlan(testPlan))
+  }
+
+  test("SubqueryAlias resets normalizeProjectList flag for inner Project") {
+    // Project under SubqueryAlias should NOT be normalized even when above a 
Project.
+    // The SubqueryAlias boundary preserves the schema, so column order 
matters.
+    val baselinePlan = LocalRelation($"col1".int, $"col2".string)
+      .select($"col1", $"col2")
+      .subquery("t")
+      .select($"col1")
+    val testPlan = LocalRelation($"col1".int, $"col2".string)
+      .select($"col2", $"col1")
+      .subquery("t")
+      .select($"col1")
+
+    // The inner Project has different column order, and SubqueryAlias resets 
the flag,
+    // so normalization should NOT make them equal.
+    assert(NormalizePlan(baselinePlan) != NormalizePlan(testPlan))
+  }
+
+  test("SubqueryAlias resets normalizeProjectList flag for inner Aggregate") {
+    // Aggregate under SubqueryAlias should NOT have its list normalized.
+    val baselinePlan = LocalRelation($"col1".int, $"col2".string)
+      .groupBy($"col1", $"col2")($"col1", $"col2")
+      .subquery("t")
+      .select($"col1")
+    val testPlan = LocalRelation($"col1".int, $"col2".string)
+      .groupBy($"col1", $"col2")($"col2", $"col1")
+      .subquery("t")
+      .select($"col1")
+
+    assert(NormalizePlan(baselinePlan) != NormalizePlan(testPlan))
+  }
+
+  test("Nested SubqueryAlias resets flag even under multiple Projects") {
+    // Project -> Project -> SubqueryAlias -> Project: the innermost Project 
should NOT
+    // be normalized because SubqueryAlias resets the flag.
+    val baselinePlan = LocalRelation($"col1".int, $"col2".string)
+      .select($"col1", $"col2")
+      .subquery("t")
+      .select($"col1", $"col2")
+      .select($"col1")
+    val testPlan = LocalRelation($"col1".int, $"col2".string)
+      .select($"col2", $"col1")
+      .subquery("t")
+      .select($"col1", $"col2")
+      .select($"col1")
+
+    // The inner Project (below SubqueryAlias) differs in order and should NOT 
be normalized.
+    assert(NormalizePlan(baselinePlan) != NormalizePlan(testPlan))
+  }
+
+  test("Project above SubqueryAlias IS normalized when under another Project") 
{
+    // Project -> Project -> SubqueryAlias -> relation
+    // The middle Project (between outer Project and SubqueryAlias) should 
still be normalized
+    // because it's under a Project, and SubqueryAlias only resets the flag 
for its children.
+    val baselinePlan = LocalRelation($"col1".int, $"col2".string)
+      .subquery("t")
+      .select($"col1", $"col2")
+      .select($"col1")
+    val testPlan = LocalRelation($"col1".int, $"col2".string)
+      .subquery("t")
+      .select($"col2", $"col1")
+      .select($"col1")
+
+    assert(baselinePlan != testPlan)
+    assert(NormalizePlan(baselinePlan) == NormalizePlan(testPlan))
+  }
+
+  test("SubqueryAlias resets flag but Project above SubqueryAlias still 
normalizes with Filter") {

Review Comment:
   Test name says "Project above SubqueryAlias still normalizes", but the plan 
uses `.groupBy(…)` (an Aggregate) above the SubqueryAlias — the next-line 
comment `// Aggregate -> Filter -> SubqueryAlias -> Project` confirms. Consider 
renaming (or switching the top to a real `Project` if that was the intent):
   
   ```suggestion
     test("SubqueryAlias resets flag but Aggregate above SubqueryAlias still 
normalizes with Filter") {
   ```
   
   Also: the assertion only checks that normalization does **not** collapse the 
two plans — it never verifies that the Aggregate above is actually normalized. 
If the intent was to cover "the structure above SubqueryAlias still 
normalizes", consider adding an equality assertion against a baseline where 
only the inner Project order differs.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to