yaooqinn commented on code in PR #55787:
URL: https://github.com/apache/spark/pull/55787#discussion_r3233282946
##########
sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala:
##########
@@ -215,6 +216,116 @@ class SqlResourceWithActualMetricsSuite
}
}
+ test("SPARK-56811: sqlTable groups sub-executions under their root
execution") {
+ // CACHE TABLE produces a root execution plus an inner sub-execution that
+ // shares its rootExecutionId. This is the canonical case where the SQL
+ // listing should fold the sub row under the root rather than flattening
it.
+ spark.sql("CREATE OR REPLACE TEMP VIEW spark_56811 AS SELECT id FROM
RANGE(10)")
+ .collect()
+ spark.sql("CACHE TABLE spark_56811_cached AS SELECT * FROM
spark_56811").collect()
+ try {
+ eventually(timeout(10.seconds), interval(1.second)) {
+ val baseUrl = spark.sparkContext.ui.get.webUrl +
+
s"/api/v1/applications/${spark.sparkContext.applicationId}/sql/sqlTable"
+
+ // Grouping ON: roots only, with subExecutions embedded on the root
that
+ // owns a sub-execution.
+ val groupedUrl = new URI(
+ s"$baseUrl?start=0&length=100&draw=1&groupSubExecution=true").toURL
+ val (groupedCode, groupedOpt, _) = getContentAndCode(groupedUrl)
+ assert(groupedCode === HttpServletResponse.SC_OK)
+ val groupedJson = JsonMethods.parse(groupedOpt.get)
+ val groupedRecordsTotal = (groupedJson \ "recordsTotal").extract[Long]
+ val groupedRecordsFiltered = (groupedJson \
"recordsFiltered").extract[Long]
+ val groupedRows = (groupedJson \ "aaData").children
+ assert(groupedRecordsTotal === groupedRows.size,
+ "with no filter, recordsTotal should match returned root count")
+ assert(groupedRecordsFiltered === groupedRows.size,
+ "with no filter, recordsFiltered should match returned root count")
+ // Every row in grouped mode is either a true root (id ==
rootExecutionId)
+ // or an orphan sub whose real parent is absent from the result set.
+ val visibleIds = groupedRows.map(r => (r \ "id").extract[Long]).toSet
+ groupedRows.foreach { row =>
+ val id = (row \ "id").extract[Long]
+ val rootId = (row \ "rootExecutionId").extract[Long]
+ assert(id == rootId || !visibleIds.contains(rootId),
+ s"grouped row $id (rootId=$rootId) is neither a root nor an
orphan")
+ }
+ val rootsWithSubs = groupedRows.filter { row =>
+ (row \ "subExecutions").children.nonEmpty
+ }
+ assert(rootsWithSubs.nonEmpty,
+ "CACHE TABLE should produce at least one root with sub-executions")
+ rootsWithSubs.foreach { row =>
+ val rootId = (row \ "id").extract[Long]
+ (row \ "subExecutions").children.foreach { sub =>
+ assert((sub \ "rootExecutionId").extract[Long] === rootId,
+ "sub-execution should reference its parent root")
+ assert((sub \ "id").extract[Long] !== rootId,
+ "sub-execution must not have the same id as its root")
+ }
+ }
+
+ // Grouping OFF: flat list of every execution, with no embedded subs.
+ val flatUrl = new URI(
+ s"$baseUrl?start=0&length=100&draw=2&groupSubExecution=false").toURL
+ val (flatCode, flatOpt, _) = getContentAndCode(flatUrl)
+ assert(flatCode === HttpServletResponse.SC_OK)
+ val flatJson = JsonMethods.parse(flatOpt.get)
+ val flatRows = (flatJson \ "aaData").children
+ assert(flatRows.size > groupedRows.size,
+ "flat listing should contain at least one extra sub-execution row")
+ flatRows.foreach { row =>
+ assert((row \ "subExecutions").children.isEmpty,
+ "flat listing should not embed subExecutions")
+ }
+ }
+ } finally {
+ spark.sql("UNCACHE TABLE IF EXISTS spark_56811_cached")
+ }
+ }
+
+ test("SPARK-56811: partitionRoots surfaces orphan sub-executions as root
rows") {
Review Comment:
The orphan branch is already directly exercised by the new `SPARK-56811:
partitionRoots surfaces orphan sub-executions as root rows` test (L288), which
constructs the exact `{root, sub, orphan}` mix and asserts on the partitioning.
The HTTP path is one line — `if (groupSubExec) partitionRoots(filteredExecs)` —
so an e2e test would mostly retest the JAX-RS/Jersey plumbing.
The reason I didn't add an HTTP-level case is that crafting one
deterministically requires us to know which substring is in the sub's
description but not the root's, and the description of a CACHE TABLE sub
depends on Spark internals (`CacheTableExec` materialization plan text), which
has drifted across versions. I'd rather not chain this PR to that. Happy to
file a follow-up if there's interest in a way to inject a synthetic
sub-execution description from a test fixture.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]