yaooqinn commented on code in PR #55787:
URL: https://github.com/apache/spark/pull/55787#discussion_r3233282946


##########
sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala:
##########
@@ -215,6 +216,116 @@ class SqlResourceWithActualMetricsSuite
     }
   }
 
+  test("SPARK-56811: sqlTable groups sub-executions under their root 
execution") {
+    // CACHE TABLE produces a root execution plus an inner sub-execution that
+    // shares its rootExecutionId. This is the canonical case where the SQL
+    // listing should fold the sub row under the root rather than flattening 
it.
+    spark.sql("CREATE OR REPLACE TEMP VIEW spark_56811 AS SELECT id FROM 
RANGE(10)")
+      .collect()
+    spark.sql("CACHE TABLE spark_56811_cached AS SELECT * FROM 
spark_56811").collect()
+    try {
+      eventually(timeout(10.seconds), interval(1.second)) {
+        val baseUrl = spark.sparkContext.ui.get.webUrl +
+          
s"/api/v1/applications/${spark.sparkContext.applicationId}/sql/sqlTable"
+
+        // Grouping ON: roots only, with subExecutions embedded on the root 
that
+        // owns a sub-execution.
+        val groupedUrl = new URI(
+          s"$baseUrl?start=0&length=100&draw=1&groupSubExecution=true").toURL
+        val (groupedCode, groupedOpt, _) = getContentAndCode(groupedUrl)
+        assert(groupedCode === HttpServletResponse.SC_OK)
+        val groupedJson = JsonMethods.parse(groupedOpt.get)
+        val groupedRecordsTotal = (groupedJson \ "recordsTotal").extract[Long]
+        val groupedRecordsFiltered = (groupedJson \ 
"recordsFiltered").extract[Long]
+        val groupedRows = (groupedJson \ "aaData").children
+        assert(groupedRecordsTotal === groupedRows.size,
+          "with no filter, recordsTotal should match returned root count")
+        assert(groupedRecordsFiltered === groupedRows.size,
+          "with no filter, recordsFiltered should match returned root count")
+        // Every row in grouped mode is either a true root (id == 
rootExecutionId)
+        // or an orphan sub whose real parent is absent from the result set.
+        val visibleIds = groupedRows.map(r => (r \ "id").extract[Long]).toSet
+        groupedRows.foreach { row =>
+          val id = (row \ "id").extract[Long]
+          val rootId = (row \ "rootExecutionId").extract[Long]
+          assert(id == rootId || !visibleIds.contains(rootId),
+            s"grouped row $id (rootId=$rootId) is neither a root nor an 
orphan")
+        }
+        val rootsWithSubs = groupedRows.filter { row =>
+          (row \ "subExecutions").children.nonEmpty
+        }
+        assert(rootsWithSubs.nonEmpty,
+          "CACHE TABLE should produce at least one root with sub-executions")
+        rootsWithSubs.foreach { row =>
+          val rootId = (row \ "id").extract[Long]
+          (row \ "subExecutions").children.foreach { sub =>
+            assert((sub \ "rootExecutionId").extract[Long] === rootId,
+              "sub-execution should reference its parent root")
+            assert((sub \ "id").extract[Long] !== rootId,
+              "sub-execution must not have the same id as its root")
+          }
+        }
+
+        // Grouping OFF: flat list of every execution, with no embedded subs.
+        val flatUrl = new URI(
+          s"$baseUrl?start=0&length=100&draw=2&groupSubExecution=false").toURL
+        val (flatCode, flatOpt, _) = getContentAndCode(flatUrl)
+        assert(flatCode === HttpServletResponse.SC_OK)
+        val flatJson = JsonMethods.parse(flatOpt.get)
+        val flatRows = (flatJson \ "aaData").children
+        assert(flatRows.size > groupedRows.size,
+          "flat listing should contain at least one extra sub-execution row")
+        flatRows.foreach { row =>
+          assert((row \ "subExecutions").children.isEmpty,
+            "flat listing should not embed subExecutions")
+        }
+      }
+    } finally {
+      spark.sql("UNCACHE TABLE IF EXISTS spark_56811_cached")
+    }
+  }
+
+  test("SPARK-56811: partitionRoots surfaces orphan sub-executions as root 
rows") {

Review Comment:
   The orphan branch is already directly exercised by the new `SPARK-56811: 
partitionRoots surfaces orphan sub-executions as root rows` test (L288), which 
constructs the exact `{root, sub, orphan}` mix and asserts on the partitioning. 
The HTTP path is one line — `if (groupSubExec) partitionRoots(filteredExecs)` — 
so an e2e test would mostly retest the JAX-RS/Jersey plumbing.
   
   The reason I didn't add an HTTP-level case is that crafting one 
deterministically requires us to know which substring is in the sub's 
description but not the root's, and the description of a CACHE TABLE sub 
depends on Spark internals (`CacheTableExec` materialization plan text), which 
has drifted across versions. I'd rather not chain this PR to that. Happy to 
file a follow-up if there's interest in a way to inject a synthetic 
sub-execution description from a test fixture.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to