spark git commit: [SPARK-15991] SparkContext.hadoopConfiguration should be always the base of hadoop conf created by SessionState
Repository: spark Updated Branches: refs/heads/branch-2.0 8f7138859 -> b3678eb7e [SPARK-15991] SparkContext.hadoopConfiguration should be always the base of hadoop conf created by SessionState ## What changes were proposed in this pull request? Before this patch, after a SparkSession has been created, hadoop conf set directly to SparkContext.hadoopConfiguration will not affect the hadoop conf created by SessionState. This patch makes the change to always use SparkContext.hadoopConfiguration as the base. This patch also changes the behavior of hive-site.xml support added in https://github.com/apache/spark/pull/12689/. With this patch, we will load hive-site.xml to SparkContext.hadoopConfiguration. ## How was this patch tested? New test in SparkSessionBuilderSuite. Author: Yin Huai Closes #13711 from yhuai/SPARK-15991. (cherry picked from commit d9c6628c47de547dc537310e3c775c7f3e0e4a12) Signed-off-by: Shixiong Zhu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3678eb7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3678eb7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3678eb7 Branch: refs/heads/branch-2.0 Commit: b3678eb7e4ac6bb08ba8579867944ba42da99b81 Parents: 8f71388 Author: Yin Huai Authored: Thu Jun 16 17:06:24 2016 -0700 Committer: Shixiong Zhu Committed: Thu Jun 16 17:06:30 2016 -0700 -- .../spark/sql/internal/SessionState.scala | 2 +- .../apache/spark/sql/internal/SharedState.scala | 14 -- .../org/apache/spark/sql/SQLQuerySuite.scala| 4 .../spark/sql/SparkSessionBuilderSuite.scala| 20 .../apache/spark/sql/hive/HiveSharedState.scala | 5 +++-- 5 files changed, 28 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b3678eb7/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala index 59efa81..dc95123 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala @@ -49,7 +49,7 @@ private[sql] class SessionState(sparkSession: SparkSession) { lazy val conf: SQLConf = new SQLConf def newHadoopConf(): Configuration = { -val hadoopConf = new Configuration(sparkSession.sharedState.hadoopConf) +val hadoopConf = new Configuration(sparkSession.sparkContext.hadoopConfiguration) conf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, v) } hadoopConf } http://git-wip-us.apache.org/repos/asf/spark/blob/b3678eb7/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index bc349b4..6c43fe3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -43,23 +43,17 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging { */ val listener: SQLListener = createListenerAndUI(sparkContext) - /** - * The base hadoop configuration which is shared among all spark sessions. It is based on the - * default hadoop configuration of Spark, with custom configurations inside `hive-site.xml`. - */ - val hadoopConf: Configuration = { -val conf = new Configuration(sparkContext.hadoopConfiguration) + { val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml") if (configFile != null) { - conf.addResource(configFile) + sparkContext.hadoopConfiguration.addResource(configFile) } -conf } /** * A catalog that interacts with external systems. */ - lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(hadoopConf) + lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(sparkContext.hadoopConfiguration) /** * A classloader used to load all user-added jar. @@ -71,7 +65,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging { // Set the Hive metastore warehouse path to the one we use val tempConf = new SQLConf sparkContext.conf.getAll.foreach { case (k, v) => tempConf.setConfString(k, v) } -val hiveWarehouseDir = hadoopConf.get("hive.metastore.warehouse.dir") +val hiveWarehouseDir = sparkContext.hadoopConfiguration.get("hive.metastore.warehouse.dir") if (h
spark git commit: [SPARK-15991] SparkContext.hadoopConfiguration should be always the base of hadoop conf created by SessionState
Repository: spark Updated Branches: refs/heads/master 62d2fa5e9 -> d9c6628c4 [SPARK-15991] SparkContext.hadoopConfiguration should be always the base of hadoop conf created by SessionState ## What changes were proposed in this pull request? Before this patch, after a SparkSession has been created, hadoop conf set directly to SparkContext.hadoopConfiguration will not affect the hadoop conf created by SessionState. This patch makes the change to always use SparkContext.hadoopConfiguration as the base. This patch also changes the behavior of hive-site.xml support added in https://github.com/apache/spark/pull/12689/. With this patch, we will load hive-site.xml to SparkContext.hadoopConfiguration. ## How was this patch tested? New test in SparkSessionBuilderSuite. Author: Yin Huai Closes #13711 from yhuai/SPARK-15991. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d9c6628c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d9c6628c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d9c6628c Branch: refs/heads/master Commit: d9c6628c47de547dc537310e3c775c7f3e0e4a12 Parents: 62d2fa5 Author: Yin Huai Authored: Thu Jun 16 17:06:24 2016 -0700 Committer: Shixiong Zhu Committed: Thu Jun 16 17:06:24 2016 -0700 -- .../spark/sql/internal/SessionState.scala | 2 +- .../apache/spark/sql/internal/SharedState.scala | 14 -- .../org/apache/spark/sql/SQLQuerySuite.scala| 4 .../spark/sql/SparkSessionBuilderSuite.scala| 20 .../apache/spark/sql/hive/HiveSharedState.scala | 5 +++-- 5 files changed, 28 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d9c6628c/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala index 59efa81..dc95123 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala @@ -49,7 +49,7 @@ private[sql] class SessionState(sparkSession: SparkSession) { lazy val conf: SQLConf = new SQLConf def newHadoopConf(): Configuration = { -val hadoopConf = new Configuration(sparkSession.sharedState.hadoopConf) +val hadoopConf = new Configuration(sparkSession.sparkContext.hadoopConfiguration) conf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, v) } hadoopConf } http://git-wip-us.apache.org/repos/asf/spark/blob/d9c6628c/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index bc349b4..6c43fe3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -43,23 +43,17 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging { */ val listener: SQLListener = createListenerAndUI(sparkContext) - /** - * The base hadoop configuration which is shared among all spark sessions. It is based on the - * default hadoop configuration of Spark, with custom configurations inside `hive-site.xml`. - */ - val hadoopConf: Configuration = { -val conf = new Configuration(sparkContext.hadoopConfiguration) + { val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml") if (configFile != null) { - conf.addResource(configFile) + sparkContext.hadoopConfiguration.addResource(configFile) } -conf } /** * A catalog that interacts with external systems. */ - lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(hadoopConf) + lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(sparkContext.hadoopConfiguration) /** * A classloader used to load all user-added jar. @@ -71,7 +65,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging { // Set the Hive metastore warehouse path to the one we use val tempConf = new SQLConf sparkContext.conf.getAll.foreach { case (k, v) => tempConf.setConfString(k, v) } -val hiveWarehouseDir = hadoopConf.get("hive.metastore.warehouse.dir") +val hiveWarehouseDir = sparkContext.hadoopConfiguration.get("hive.metastore.warehouse.dir") if (hiveWarehouseDir != null && !tempConf.contains(SQLConf.WAREHOUSE_PATH.key)) { // If hive.metastore.w