Repository: spark
Updated Branches:
  refs/heads/branch-2.0 a5bec5b81 -> 57dd4efcd


[SPARK-15792][SQL] Allows operator to change the verbosity in explain output

## What changes were proposed in this pull request?

This PR allows customization of verbosity in explain output. After change, 
`dataframe.explain()` and `dataframe.explain(true)` has different verbosity 
output for physical plan.

Currently, this PR only enables verbosity string for operator 
`HashAggregateExec` and `SortAggregateExec`. We will gradually enable verbosity 
string for more operators in future.

**Less verbose mode:** dataframe.explain(extended = false)

`output=[count(a)#85L]` is **NOT** displayed for HashAggregate.

```
scala> Seq((1,2,3)).toDF("a", "b", "c").createTempView("df2")
scala> spark.sql("select count(a) from df2").explain()
== Physical Plan ==
*HashAggregate(key=[], functions=[count(1)])
+- Exchange SinglePartition
   +- *HashAggregate(key=[], functions=[partial_count(1)])
      +- LocalTableScan
```

**Verbose mode:** dataframe.explain(extended = true)

`output=[count(a)#85L]` is displayed for HashAggregate.

```
scala> spark.sql("select count(a) from df2").explain(true)  // 
"output=[count(a)#85L]" is added
...
== Physical Plan ==
*HashAggregate(key=[], functions=[count(1)], output=[count(a)#85L])
+- Exchange SinglePartition
   +- *HashAggregate(key=[], functions=[partial_count(1)], output=[count#87L])
      +- LocalTableScan
```

## How was this patch tested?

Manual test.

Author: Sean Zhong <seanzh...@databricks.com>

Closes #13535 from clockfly/verbose_breakdown_2.

(cherry picked from commit 5f731d6859c4516941e5f90c99c966ef76268864)
Signed-off-by: Cheng Lian <l...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/57dd4efc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/57dd4efc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/57dd4efc

Branch: refs/heads/branch-2.0
Commit: 57dd4efcda9158646df41ea8d70754dc110ecd6f
Parents: a5bec5b
Author: Sean Zhong <seanzh...@databricks.com>
Authored: Mon Jun 6 22:59:25 2016 -0700
Committer: Cheng Lian <l...@databricks.com>
Committed: Mon Jun 6 22:59:34 2016 -0700

----------------------------------------------------------------------
 .../sql/catalyst/expressions/Expression.scala   |  4 ++++
 .../spark/sql/catalyst/plans/QueryPlan.scala    |  2 ++
 .../spark/sql/catalyst/trees/TreeNode.scala     | 23 +++++++++++++++-----
 .../spark/sql/execution/QueryExecution.scala    | 14 +++++++-----
 .../sql/execution/WholeStageCodegenExec.scala   |  6 +++--
 .../execution/aggregate/HashAggregateExec.scala | 12 ++++++++--
 .../execution/aggregate/SortAggregateExec.scala | 12 ++++++++--
 7 files changed, 55 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 2ec4621..efe592d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -190,6 +190,10 @@ abstract class Expression extends TreeNode[Expression] {
     case single => single :: Nil
   }
 
+  // Marks this as final, Expression.verboseString should never be called, and 
thus shouldn't be
+  // overridden by concrete classes.
+  final override def verboseString: String = simpleString
+
   override def simpleString: String = toString
 
   override def toString: String = prettyName + flatArguments.mkString("(", ", 
", ")")

http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 19a66cf..cf34f4b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -257,6 +257,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] 
extends TreeNode[PlanT
 
   override def simpleString: String = statePrefix + super.simpleString
 
+  override def verboseString: String = simpleString
+
   /**
    * All the subqueries of current plan.
    */

http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 22d82c6..c67366d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -462,10 +462,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
   /** ONE line description of this node. */
   def simpleString: String = s"$nodeName $argString".trim
 
+  /** ONE line description of this node with more information */
+  def verboseString: String
+
   override def toString: String = treeString
 
   /** Returns a string representation of the nodes in this tree */
-  def treeString: String = generateTreeString(0, Nil, new 
StringBuilder).toString
+  def treeString: String = treeString(verbose = true)
+
+  def treeString(verbose: Boolean): String = {
+    generateTreeString(0, Nil, new StringBuilder, verbose).toString
+  }
 
   /**
    * Returns a string representation of the nodes in this tree, where each 
operator is numbered.
@@ -508,6 +515,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
       depth: Int,
       lastChildren: Seq[Boolean],
       builder: StringBuilder,
+      verbose: Boolean,
       prefix: String = ""): StringBuilder = {
     if (depth > 0) {
       lastChildren.init.foreach { isLast =>
@@ -520,18 +528,21 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
     }
 
     builder.append(prefix)
-    builder.append(simpleString)
+    val headline = if (verbose) verboseString else simpleString
+    builder.append(headline)
     builder.append("\n")
 
     if (innerChildren.nonEmpty) {
       innerChildren.init.foreach(_.generateTreeString(
-        depth + 2, lastChildren :+ false :+ false, builder))
-      innerChildren.last.generateTreeString(depth + 2, lastChildren :+ false 
:+ true, builder)
+        depth + 2, lastChildren :+ false :+ false, builder, verbose))
+      innerChildren.last.generateTreeString(
+        depth + 2, lastChildren :+ false :+ true, builder, verbose)
     }
 
     if (children.nonEmpty) {
-      children.init.foreach(_.generateTreeString(depth + 1, lastChildren :+ 
false, builder, prefix))
-      children.last.generateTreeString(depth + 1, lastChildren :+ true, 
builder, prefix)
+      children.init.foreach(
+        _.generateTreeString(depth + 1, lastChildren :+ false, builder, 
verbose, prefix))
+      children.last.generateTreeString(depth + 1, lastChildren :+ true, 
builder, verbose, prefix)
     }
 
     builder

http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 330459c..560214a 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -201,24 +201,26 @@ class QueryExecution(val sparkSession: SparkSession, val 
logical: LogicalPlan) {
 
   def simpleString: String = {
     s"""== Physical Plan ==
-       |${stringOrError(executedPlan)}
+       |${stringOrError(executedPlan.treeString(verbose = false))}
       """.stripMargin.trim
   }
 
   override def toString: String = {
     def output =
       analyzed.output.map(o => s"${o.name}: 
${o.dataType.simpleString}").mkString(", ")
-    val analyzedPlan =
-      Seq(stringOrError(output), 
stringOrError(analyzed)).filter(_.nonEmpty).mkString("\n")
+    val analyzedPlan = Seq(
+      stringOrError(output),
+      stringOrError(analyzed.treeString(verbose = true))
+    ).filter(_.nonEmpty).mkString("\n")
 
     s"""== Parsed Logical Plan ==
-       |${stringOrError(logical)}
+       |${stringOrError(logical.treeString(verbose = true))}
        |== Analyzed Logical Plan ==
        |$analyzedPlan
        |== Optimized Logical Plan ==
-       |${stringOrError(optimizedPlan)}
+       |${stringOrError(optimizedPlan.treeString(verbose = true))}
        |== Physical Plan ==
-       |${stringOrError(executedPlan)}
+       |${stringOrError(executedPlan.treeString(verbose = true))}
     """.stripMargin.trim
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index d3e8d4e..e0d8e35 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -250,8 +250,9 @@ case class InputAdapter(child: SparkPlan) extends 
UnaryExecNode with CodegenSupp
       depth: Int,
       lastChildren: Seq[Boolean],
       builder: StringBuilder,
+      verbose: Boolean,
       prefix: String = ""): StringBuilder = {
-    child.generateTreeString(depth, lastChildren, builder, "")
+    child.generateTreeString(depth, lastChildren, builder, verbose, "")
   }
 }
 
@@ -407,8 +408,9 @@ case class WholeStageCodegenExec(child: SparkPlan) extends 
UnaryExecNode with Co
       depth: Int,
       lastChildren: Seq[Boolean],
       builder: StringBuilder,
+      verbose: Boolean,
       prefix: String = ""): StringBuilder = {
-    child.generateTreeString(depth, lastChildren, builder, "*")
+    child.generateTreeString(depth, lastChildren, builder, verbose, "*")
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index f270ca0..b617e26 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -764,7 +764,11 @@ case class HashAggregateExec(
      """
   }
 
-  override def simpleString: String = {
+  override def verboseString: String = toString(verbose = true)
+
+  override def simpleString: String = toString(verbose = false)
+
+  private def toString(verbose: Boolean): String = {
     val allAggregateExpressions = aggregateExpressions
 
     testFallbackStartsAt match {
@@ -772,7 +776,11 @@ case class HashAggregateExec(
         val keyString = groupingExpressions.mkString("[", ",", "]")
         val functionString = allAggregateExpressions.mkString("[", ",", "]")
         val outputString = output.mkString("[", ",", "]")
-        s"HashAggregate(key=$keyString, functions=$functionString, 
output=$outputString)"
+        if (verbose) {
+          s"HashAggregate(key=$keyString, functions=$functionString, 
output=$outputString)"
+        } else {
+          s"HashAggregate(key=$keyString, functions=$functionString)"
+        }
       case Some(fallbackStartsAt) =>
         s"HashAggregateWithControlledFallback $groupingExpressions " +
           s"$allAggregateExpressions $resultExpressions 
fallbackStartsAt=$fallbackStartsAt"

http://git-wip-us.apache.org/repos/asf/spark/blob/57dd4efc/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
index 9e48ff8..41ba9f5 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
@@ -103,12 +103,20 @@ case class SortAggregateExec(
     }
   }
 
-  override def simpleString: String = {
+  override def simpleString: String = toString(verbose = false)
+
+  override def verboseString: String = toString(verbose = true)
+
+  private def toString(verbose: Boolean): String = {
     val allAggregateExpressions = aggregateExpressions
 
     val keyString = groupingExpressions.mkString("[", ",", "]")
     val functionString = allAggregateExpressions.mkString("[", ",", "]")
     val outputString = output.mkString("[", ",", "]")
-    s"SortAggregate(key=$keyString, functions=$functionString, 
output=$outputString)"
+    if (verbose) {
+      s"SortAggregate(key=$keyString, functions=$functionString, 
output=$outputString)"
+    } else {
+      s"SortAggregate(key=$keyString, functions=$functionString)"
+    }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to