Repository: spark Updated Branches: refs/heads/master 85ccee81a -> e8a5d50a9
[SPARK-5638][SQL] Add a config flag to disable eager analysis of DataFrames Author: Reynold Xin <r...@databricks.com> Closes #4408 from rxin/df-config-eager and squashes the following commits: c0204cf [Reynold Xin] [SPARK-5638][SQL] Add a config flag to disable eager analysis of DataFrames. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e8a5d50a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e8a5d50a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e8a5d50a Branch: refs/heads/master Commit: e8a5d50a96f6e7d4fce33ea19fbfc083f4351296 Parents: 85ccee8 Author: Reynold Xin <r...@databricks.com> Authored: Thu Feb 5 18:07:10 2015 -0800 Committer: Reynold Xin <r...@databricks.com> Committed: Thu Feb 5 18:07:10 2015 -0800 ---------------------------------------------------------------------- .../scala/org/apache/spark/sql/DataFrameImpl.scala | 4 +++- .../main/scala/org/apache/spark/sql/SQLConf.scala | 6 ++++++ .../org/apache/spark/sql/DataFrameSuite.scala | 17 ++++++++++++++--- 3 files changed, 23 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e8a5d50a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala index 58d1175..4911443 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala @@ -53,7 +53,9 @@ private[sql] class DataFrameImpl protected[sql]( def this(sqlContext: SQLContext, logicalPlan: LogicalPlan) = { this(sqlContext, { val qe = sqlContext.executePlan(logicalPlan) - qe.analyzed // This should force analysis and throw errors if there are any + if (sqlContext.conf.dataFrameEagerAnalysis) { + qe.analyzed // This should force analysis and throw errors if there are any + } qe }) } http://git-wip-us.apache.org/repos/asf/spark/blob/e8a5d50a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala index 5ef3bb0..180f5e7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala @@ -52,6 +52,9 @@ private[spark] object SQLConf { // This is used to set the default data source val DEFAULT_DATA_SOURCE_NAME = "spark.sql.default.datasource" + // Whether to perform eager analysis on a DataFrame. + val DATAFRAME_EAGER_ANALYSIS = "spark.sql.dataframe.eagerAnalysis" + object Deprecated { val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks" } @@ -173,6 +176,9 @@ private[sql] class SQLConf extends Serializable { private[spark] def defaultDataSourceName: String = getConf(DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.parquet") + private[spark] def dataFrameEagerAnalysis: Boolean = + getConf(DATAFRAME_EAGER_ANALYSIS, "true").toBoolean + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ http://git-wip-us.apache.org/repos/asf/spark/blob/e8a5d50a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 74c2945..77fd316 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -17,19 +17,23 @@ package org.apache.spark.sql +import scala.language.postfixOps + import org.apache.spark.sql.Dsl._ import org.apache.spark.sql.types._ - -/* Implicits */ +import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.test.TestSQLContext.logicalPlanToSparkQuery import org.apache.spark.sql.test.TestSQLContext.implicits._ -import scala.language.postfixOps class DataFrameSuite extends QueryTest { import org.apache.spark.sql.TestData._ test("analysis error should be eagerly reported") { + val oldSetting = TestSQLContext.conf.dataFrameEagerAnalysis + // Eager analysis. + TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, "true") + intercept[Exception] { testData.select('nonExistentName) } intercept[Exception] { testData.groupBy('key).agg(Map("nonExistentName" -> "sum")) @@ -40,6 +44,13 @@ class DataFrameSuite extends QueryTest { intercept[Exception] { testData.groupBy($"abcd").agg(Map("key" -> "sum")) } + + // No more eager analysis once the flag is turned off + TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, "false") + testData.select('nonExistentName) + + // Set the flag back to original value before this test. + TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, oldSetting.toString) } test("table scan") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org