Repository: spark Updated Branches: refs/heads/branch-2.0 a5bec5b81 -> 57dd4efcd
[SPARK-15792][SQL] Allows operator to change the verbosity in explain output ## What changes were proposed in this pull request? This PR allows customization of verbosity in explain output. After change, `dataframe.explain()` and `dataframe.explain(true)` has different verbosity output for physical plan. Currently, this PR only enables verbosity string for operator `HashAggregateExec` and `SortAggregateExec`. We will gradually enable verbosity string for more operators in future. **Less verbose mode:** dataframe.explain(extended = false) `output=[count(a)#85L]` is **NOT** displayed for HashAggregate. ``` scala> Seq((1,2,3)).toDF("a", "b", "c").createTempView("df2") scala> spark.sql("select count(a) from df2").explain() == Physical Plan == *HashAggregate(key=[], functions=[count(1)]) +- Exchange SinglePartition +- *HashAggregate(key=[], functions=[partial_count(1)]) +- LocalTableScan ``` **Verbose mode:** dataframe.explain(extended = true) `output=[count(a)#85L]` is displayed for HashAggregate. ``` scala> spark.sql("select count(a) from df2").explain(true) // "output=[count(a)#85L]" is added ... == Physical Plan == *HashAggregate(key=[], functions=[count(1)], output=[count(a)#85L]) +- Exchange SinglePartition +- *HashAggregate(key=[], functions=[partial_count(1)], output=[count#87L]) +- LocalTableScan ``` ## How was this patch tested? Manual test. Author: Sean Zhong <seanzh...@databricks.com> Closes #13535 from clockfly/verbose_breakdown_2. (cherry picked from commit 5f731d6859c4516941e5f90c99c966ef76268864) Signed-off-by: Cheng Lian <l...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/57dd4efc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/57dd4efc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/57dd4efc Branch: refs/heads/branch-2.0 Commit: 57dd4efcda9158646df41ea8d70754dc110ecd6f Parents: a5bec5b Author: Sean Zhong <seanzh...@databricks.com> Authored: Mon Jun 6 22:59:25 2016 -0700 Committer: Cheng Lian <l...@databricks.com> Committed: Mon Jun 6 22:59:34 2016 -0700 ---------------------------------------------------------------------- .../sql/catalyst/expressions/Expression.scala | 4 ++++ .../spark/sql/catalyst/plans/QueryPlan.scala | 2 ++ .../spark/sql/catalyst/trees/TreeNode.scala | 23 +++++++++++++++----- .../spark/sql/execution/QueryExecution.scala | 14 +++++++----- .../sql/execution/WholeStageCodegenExec.scala | 6 +++-- .../execution/aggregate/HashAggregateExec.scala | 12 ++++++++-- .../execution/aggregate/SortAggregateExec.scala | 12 ++++++++-- 7 files changed, 55 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 2ec4621..efe592d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -190,6 +190,10 @@ abstract class Expression extends TreeNode[Expression] { case single => single :: Nil } + // Marks this as final, Expression.verboseString should never be called, and thus shouldn't be + // overridden by concrete classes. + final override def verboseString: String = simpleString + override def simpleString: String = toString override def toString: String = prettyName + flatArguments.mkString("(", ", ", ")") http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 19a66cf..cf34f4b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -257,6 +257,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT override def simpleString: String = statePrefix + super.simpleString + override def verboseString: String = simpleString + /** * All the subqueries of current plan. */ http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 22d82c6..c67366d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -462,10 +462,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { /** ONE line description of this node. */ def simpleString: String = s"$nodeName $argString".trim + /** ONE line description of this node with more information */ + def verboseString: String + override def toString: String = treeString /** Returns a string representation of the nodes in this tree */ - def treeString: String = generateTreeString(0, Nil, new StringBuilder).toString + def treeString: String = treeString(verbose = true) + + def treeString(verbose: Boolean): String = { + generateTreeString(0, Nil, new StringBuilder, verbose).toString + } /** * Returns a string representation of the nodes in this tree, where each operator is numbered. @@ -508,6 +515,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { depth: Int, lastChildren: Seq[Boolean], builder: StringBuilder, + verbose: Boolean, prefix: String = ""): StringBuilder = { if (depth > 0) { lastChildren.init.foreach { isLast => @@ -520,18 +528,21 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { } builder.append(prefix) - builder.append(simpleString) + val headline = if (verbose) verboseString else simpleString + builder.append(headline) builder.append("\n") if (innerChildren.nonEmpty) { innerChildren.init.foreach(_.generateTreeString( - depth + 2, lastChildren :+ false :+ false, builder)) - innerChildren.last.generateTreeString(depth + 2, lastChildren :+ false :+ true, builder) + depth + 2, lastChildren :+ false :+ false, builder, verbose)) + innerChildren.last.generateTreeString( + depth + 2, lastChildren :+ false :+ true, builder, verbose) } if (children.nonEmpty) { - children.init.foreach(_.generateTreeString(depth + 1, lastChildren :+ false, builder, prefix)) - children.last.generateTreeString(depth + 1, lastChildren :+ true, builder, prefix) + children.init.foreach( + _.generateTreeString(depth + 1, lastChildren :+ false, builder, verbose, prefix)) + children.last.generateTreeString(depth + 1, lastChildren :+ true, builder, verbose, prefix) } builder http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index 330459c..560214a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -201,24 +201,26 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) { def simpleString: String = { s"""== Physical Plan == - |${stringOrError(executedPlan)} + |${stringOrError(executedPlan.treeString(verbose = false))} """.stripMargin.trim } override def toString: String = { def output = analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}").mkString(", ") - val analyzedPlan = - Seq(stringOrError(output), stringOrError(analyzed)).filter(_.nonEmpty).mkString("\n") + val analyzedPlan = Seq( + stringOrError(output), + stringOrError(analyzed.treeString(verbose = true)) + ).filter(_.nonEmpty).mkString("\n") s"""== Parsed Logical Plan == - |${stringOrError(logical)} + |${stringOrError(logical.treeString(verbose = true))} |== Analyzed Logical Plan == |$analyzedPlan |== Optimized Logical Plan == - |${stringOrError(optimizedPlan)} + |${stringOrError(optimizedPlan.treeString(verbose = true))} |== Physical Plan == - |${stringOrError(executedPlan)} + |${stringOrError(executedPlan.treeString(verbose = true))} """.stripMargin.trim } http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index d3e8d4e..e0d8e35 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -250,8 +250,9 @@ case class InputAdapter(child: SparkPlan) extends UnaryExecNode with CodegenSupp depth: Int, lastChildren: Seq[Boolean], builder: StringBuilder, + verbose: Boolean, prefix: String = ""): StringBuilder = { - child.generateTreeString(depth, lastChildren, builder, "") + child.generateTreeString(depth, lastChildren, builder, verbose, "") } } @@ -407,8 +408,9 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co depth: Int, lastChildren: Seq[Boolean], builder: StringBuilder, + verbose: Boolean, prefix: String = ""): StringBuilder = { - child.generateTreeString(depth, lastChildren, builder, "*") + child.generateTreeString(depth, lastChildren, builder, verbose, "*") } } http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala index f270ca0..b617e26 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala @@ -764,7 +764,11 @@ case class HashAggregateExec( """ } - override def simpleString: String = { + override def verboseString: String = toString(verbose = true) + + override def simpleString: String = toString(verbose = false) + + private def toString(verbose: Boolean): String = { val allAggregateExpressions = aggregateExpressions testFallbackStartsAt match { @@ -772,7 +776,11 @@ case class HashAggregateExec( val keyString = groupingExpressions.mkString("[", ",", "]") val functionString = allAggregateExpressions.mkString("[", ",", "]") val outputString = output.mkString("[", ",", "]") - s"HashAggregate(key=$keyString, functions=$functionString, output=$outputString)" + if (verbose) { + s"HashAggregate(key=$keyString, functions=$functionString, output=$outputString)" + } else { + s"HashAggregate(key=$keyString, functions=$functionString)" + } case Some(fallbackStartsAt) => s"HashAggregateWithControlledFallback $groupingExpressions " + s"$allAggregateExpressions $resultExpressions fallbackStartsAt=$fallbackStartsAt" http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala index 9e48ff8..41ba9f5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala @@ -103,12 +103,20 @@ case class SortAggregateExec( } } - override def simpleString: String = { + override def simpleString: String = toString(verbose = false) + + override def verboseString: String = toString(verbose = true) + + private def toString(verbose: Boolean): String = { val allAggregateExpressions = aggregateExpressions val keyString = groupingExpressions.mkString("[", ",", "]") val functionString = allAggregateExpressions.mkString("[", ",", "]") val outputString = output.mkString("[", ",", "]") - s"SortAggregate(key=$keyString, functions=$functionString, output=$outputString)" + if (verbose) { + s"SortAggregate(key=$keyString, functions=$functionString, output=$outputString)" + } else { + s"SortAggregate(key=$keyString, functions=$functionString)" + } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org