spark git commit: [SPARK-17751][SQL][BACKPORT-2.0] Remove spark.sql.eagerAnalysis and Output the Plan if Existed in AnalysisException

rxin Tue, 18 Oct 2016 10:59:07 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 9e806f2a4 -> 2aa25833c



[SPARK-17751][SQL][BACKPORT-2.0] Remove spark.sql.eagerAnalysis and Output the 
Plan if Existed in AnalysisException

### What changes were proposed in this pull request?
This PR is to backport the fix https://github.com/apache/spark/pull/15316 to 
2.0.

Dataset always does eager analysis now. Thus, `spark.sql.eagerAnalysis` is not 
used any more. Thus, we need to remove it.

This PR also outputs the plan. Without the fix, the analysis error is like
```
cannot resolve '`k1`' given input columns: [k, v]; line 1 pos 12
```

After the fix, the analysis error becomes:
```
org.apache.spark.sql.AnalysisException: cannot resolve '`k1`' given input 
columns: [k, v]; line 1 pos 12;
'Project [unresolvedalias(CASE WHEN ('k1 = 2) THEN 22 WHEN ('k1 = 4) THEN 44 
ELSE 0 END, None), v#6]
+- SubqueryAlias t
   +- Project [_1#2 AS k#5, _2#3 AS v#6]
      +- LocalRelation [_1#2, _2#3]
```

### How was this patch tested?
N/A

Author: gatorsmile <gatorsm...@gmail.com>

Closes #15529 from gatorsmile/eagerAnalysis20.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2aa25833
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2aa25833
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2aa25833

Branch: refs/heads/branch-2.0
Commit: 2aa25833c6f40af13a03a813b5f75d515f689577
Parents: 9e806f2
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Tue Oct 18 10:58:19 2016 -0700
Committer: Reynold Xin <r...@databricks.com>
Committed: Tue Oct 18 10:58:19 2016 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/AnalysisException.scala    |  7 +++++++
 .../org/apache/spark/sql/execution/debug/package.scala    |  9 ---------
 .../scala/org/apache/spark/sql/internal/SQLConf.scala     | 10 ----------
 .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala    |  3 +++
 4 files changed, 10 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2aa25833/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
index 6911843..d3ee8f7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
@@ -43,6 +43,13 @@ class AnalysisException protected[sql] (
   }
 
   override def getMessage: String = {
+    val planAnnotation = plan.map(p => s";\n$p").getOrElse("")
+    getSimpleMessage + planAnnotation
+  }
+
+  // Outputs an exception without the logical plan.
+  // For testing only
+  def getSimpleMessage: String = {
     val lineAnnotation = line.map(l => s" line $l").getOrElse("")
     val positionAnnotation = startPosition.map(p => s" pos $p").getOrElse("")
     s"$message;$lineAnnotation$positionAnnotation"

http://git-wip-us.apache.org/repos/asf/spark/blob/2aa25833/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index d321f4c..dd9d837 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -70,15 +70,6 @@ package object debug {
   }
 
   /**
-   * Augments [[SparkSession]] with debug methods.
-   */
-  implicit class DebugSQLContext(sparkSession: SparkSession) {
-    def debug(): Unit = {
-      sparkSession.conf.set(SQLConf.DATAFRAME_EAGER_ANALYSIS.key, false)
-    }
-  }
-
-  /**
    * Augments [[Dataset]]s with debug methods.
    */
   implicit class DebugQuery(query: Dataset[_]) extends Logging {

http://git-wip-us.apache.org/repos/asf/spark/blob/2aa25833/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 452eeed..8ba87c8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -396,14 +396,6 @@ object SQLConf {
       .intConf
       .createWithDefault(32)
 
-  // Whether to perform eager analysis when constructing a dataframe.
-  // Set to false when debugging requires the ability to look at invalid query 
plans.
-  val DATAFRAME_EAGER_ANALYSIS = SQLConfigBuilder("spark.sql.eagerAnalysis")
-    .internal()
-    .doc("When true, eagerly applies query analysis on DataFrame operations.")
-    .booleanConf
-    .createWithDefault(true)
-
   // Whether to automatically resolve ambiguity in join conditions for 
self-joins.
   // See SPARK-6231.
   val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY =
@@ -713,8 +705,6 @@ private[sql] class SQLConf extends Serializable with 
CatalystConf with Logging {
   // See the comments of SCHEMA_STRING_LENGTH_THRESHOLD above for more 
information.
   def schemaStringLengthThreshold: Int = 
getConf(SCHEMA_STRING_LENGTH_THRESHOLD)
 
-  def dataFrameEagerAnalysis: Boolean = getConf(DATAFRAME_EAGER_ANALYSIS)
-
   def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
     getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2aa25833/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 55d5a56..02841d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -220,6 +220,9 @@ class SQLQueryTestSuite extends QueryTest with 
SharedSQLContext {
       if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, 
answer.sorted)
 
     } catch {
+      case a: AnalysisException if a.plan.nonEmpty =>
+        // Do not output the logical plan tree which contains expression IDs.
+        (StructType(Seq.empty), Seq(a.getClass.getName, a.getSimpleMessage))
       case NonFatal(e) =>
         // If there is an exception, put the exception class followed by the 
message.
         (StructType(Seq.empty), Seq(e.getClass.getName, e.getMessage))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17751][SQL][BACKPORT-2.0] Remove spark.sql.eagerAnalysis and Output the Plan if Existed in AnalysisException

Reply via email to