This is an automated email from the ASF dual-hosted git repository.

tgraves pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6c3b7f9  [SPARK-35074][CORE] hardcoded configs move to config package
6c3b7f9 is described below

commit 6c3b7f92cfaf4d11c8c9c984082ea40bd1f86abd
Author: dgd-contributor <dgd_contribu...@viettel.com.vn>
AuthorDate: Mon Jun 7 09:55:03 2021 -0500

    [SPARK-35074][CORE] hardcoded configs move to config package
    
    ### What changes were proposed in this pull request?
    Currently spark.jars.xxx property keys (e.g. spark.jars.ivySettings and 
spark.jars.packages) are hardcoded in multiple places within Spark code across 
multiple modules. We should define them in config/package.scala and reference 
them in all other places.
    
    ### Why are the changes needed?
    improvement
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    no
    
    Closes #32746 from 
dgd-contributor/SPARK-35074_configs_should_be_moved_to_config_package.scala.
    
    Authored-by: dgd-contributor <dgd_contribu...@viettel.com.vn>
    Signed-off-by: Thomas Graves <tgra...@apache.org>
---
 .../org/apache/spark/deploy/SparkSubmit.scala      | 15 +++---
 .../apache/spark/deploy/SparkSubmitArguments.scala | 14 +++---
 .../org/apache/spark/internal/config/package.scala | 56 ++++++++++++++++++++++
 .../org/apache/spark/util/DependencyUtils.scala    | 11 +++--
 4 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 568bcf9..a65be54 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -588,7 +588,8 @@ private[spark] class SparkSubmit extends Logging {
       OptionAssigner(args.deployMode, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
         confKey = SUBMIT_DEPLOY_MODE.key),
       OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = 
"spark.app.name"),
-      OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, confKey = 
"spark.jars.ivy"),
+      OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT,
+        confKey = JAR_IVY_REPO_PATH.key),
       OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT,
         confKey = DRIVER_MEMORY.key),
       OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, 
ALL_DEPLOY_MODES,
@@ -605,13 +606,13 @@ private[spark] class SparkSubmit extends Logging {
 
       // Propagate attributes for dependency resolution at the driver side
       OptionAssigner(args.packages, STANDALONE | MESOS | KUBERNETES,
-        CLUSTER, confKey = "spark.jars.packages"),
+        CLUSTER, confKey = JAR_PACKAGES.key),
       OptionAssigner(args.repositories, STANDALONE | MESOS | KUBERNETES,
-        CLUSTER, confKey = "spark.jars.repositories"),
+        CLUSTER, confKey = JAR_REPOSITORIES.key),
       OptionAssigner(args.ivyRepoPath, STANDALONE | MESOS | KUBERNETES,
-        CLUSTER, confKey = "spark.jars.ivy"),
+        CLUSTER, confKey = JAR_IVY_REPO_PATH.key),
       OptionAssigner(args.packagesExclusions, STANDALONE | MESOS | KUBERNETES,
-        CLUSTER, confKey = "spark.jars.excludes"),
+        CLUSTER, confKey = JAR_PACKAGES_EXCLUSIONS.key),
 
       // Yarn only
       OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, confKey = 
"spark.yarn.queue"),
@@ -646,7 +647,7 @@ private[spark] class SparkSubmit extends Logging {
         confKey = DRIVER_CORES.key),
       OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER,
         confKey = DRIVER_SUPERVISE.key),
-      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, confKey = 
"spark.jars.ivy"),
+      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, confKey = 
JAR_IVY_REPO_PATH.key),
 
       // An internal option used only for spark-shell to add user jars to 
repl's classloader,
       // previously it uses "spark.jars" or "spark.yarn.dist.jars" which now 
may be pointed to
@@ -1299,7 +1300,7 @@ private[spark] object SparkSubmitUtils extends Logging {
     val file = Option(uri.getScheme).getOrElse("file") match {
       case "file" => new File(uri.getPath)
       case scheme => throw new IllegalArgumentException(s"Scheme $scheme not 
supported in " +
-        "spark.jars.ivySettings")
+        JAR_IVY_SETTING_PATH.key)
     }
     require(file.exists(), s"Ivy settings file $file does not exist")
     require(file.isFile(), s"Ivy settings file $file is not a normal file")
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 692e7ea..47fbab5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -185,13 +185,13 @@ private[deploy] class SparkSubmitArguments(args: 
Seq[String], env: Map[String, S
     files = Option(files).orElse(sparkProperties.get(config.FILES.key)).orNull
     archives = 
Option(archives).orElse(sparkProperties.get(config.ARCHIVES.key)).orNull
     pyFiles = 
Option(pyFiles).orElse(sparkProperties.get(config.SUBMIT_PYTHON_FILES.key)).orNull
-    ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull
-    ivySettingsPath = sparkProperties.get("spark.jars.ivySettings")
-    packages = 
Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull
+    ivyRepoPath = sparkProperties.get(config.JAR_IVY_REPO_PATH.key).orNull
+    ivySettingsPath = sparkProperties.get(config.JAR_IVY_SETTING_PATH.key)
+    packages = 
Option(packages).orElse(sparkProperties.get(config.JAR_PACKAGES.key)).orNull
     packagesExclusions = Option(packagesExclusions)
-      .orElse(sparkProperties.get("spark.jars.excludes")).orNull
+      .orElse(sparkProperties.get(config.JAR_PACKAGES_EXCLUSIONS.key)).orNull
     repositories = Option(repositories)
-      .orElse(sparkProperties.get("spark.jars.repositories")).orNull
+      .orElse(sparkProperties.get(config.JAR_REPOSITORIES.key)).orNull
     deployMode = Option(deployMode)
       .orElse(sparkProperties.get(config.SUBMIT_DEPLOY_MODE.key))
       .orElse(env.get("DEPLOY_MODE"))
@@ -200,11 +200,11 @@ private[deploy] class SparkSubmitArguments(args: 
Seq[String], env: Map[String, S
       .getOrElse(sparkProperties.get(config.EXECUTOR_INSTANCES.key).orNull)
     queue = 
Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull
     keytab = Option(keytab)
-      .orElse(sparkProperties.get("spark.kerberos.keytab"))
+      .orElse(sparkProperties.get(config.KEYTAB.key))
       .orElse(sparkProperties.get("spark.yarn.keytab"))
       .orNull
     principal = Option(principal)
-      .orElse(sparkProperties.get("spark.kerberos.principal"))
+      .orElse(sparkProperties.get(config.PRINCIPAL.key))
       .orElse(sparkProperties.get("spark.yarn.principal"))
       .orNull
     dynamicAllocationEnabled =
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala 
b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index f8d6753..9574416 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -2148,4 +2148,60 @@ package object config {
       // batch of block will be loaded in memory with memory mapping, which 
has higher overhead
       // with small MB sized chunk of data.
       .createWithDefaultString("3m")
+
+  private[spark] val JAR_IVY_REPO_PATH =
+    ConfigBuilder("spark.jars.ivy")
+      .doc("Path to specify the Ivy user directory, used for the local Ivy 
cache and " +
+        "package files from spark.jars.packages. " +
+        "This will override the Ivy property ivy.default.ivy.user.dir " +
+        "which defaults to ~/.ivy2.")
+      .version("1.3.0")
+      .stringConf
+      .createOptional
+
+  private[spark] val JAR_IVY_SETTING_PATH =
+    ConfigBuilder("spark.jars.ivySettings")
+      .doc("Path to an Ivy settings file to customize resolution of jars 
specified " +
+        "using spark.jars.packages instead of the built-in defaults, such as 
maven central. " +
+        "Additional repositories given by the command-line option 
--repositories " +
+        "or spark.jars.repositories will also be included. " +
+        "Useful for allowing Spark to resolve artifacts from behind a firewall 
" +
+        "e.g. via an in-house artifact server like Artifactory. " +
+        "Details on the settings file format can be found at Settings Files")
+      .version("2.2.0")
+      .stringConf
+      .createOptional
+
+  private[spark] val JAR_PACKAGES =
+    ConfigBuilder("spark.jars.packages")
+      .doc("Comma-separated list of Maven coordinates of jars to include " +
+        "on the driver and executor classpaths. The coordinates should be " +
+        "groupId:artifactId:version. If spark.jars.ivySettings is given 
artifacts " +
+        "will be resolved according to the configuration in the file, 
otherwise artifacts " +
+        "will be searched for in the local maven repo, then maven central and 
finally " +
+        "any additional remote repositories given by the command-line option 
--repositories. " +
+        "For more details, see Advanced Dependency Management.")
+      .version("1.5.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
+  private[spark] val JAR_PACKAGES_EXCLUSIONS =
+    ConfigBuilder("spark.jars.excludes")
+      .doc("Comma-separated list of groupId:artifactId, " +
+        "to exclude while resolving the dependencies provided in 
spark.jars.packages " +
+        "to avoid dependency conflicts.")
+      .version("1.5.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
+  private[spark] val JAR_REPOSITORIES =
+    ConfigBuilder("spark.jars.repositories")
+      .doc("Comma-separated list of additional remote repositories to search " 
+
+        "for the maven coordinates given with --packages or 
spark.jars.packages.")
+      .version("2.3.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
 }
diff --git a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala 
b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
index f7135edd..da8ea4f 100644
--- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.deploy.SparkSubmitUtils
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 
 case class IvyProperties(
     packagesExclusions: String,
@@ -39,11 +40,11 @@ private[spark] object DependencyUtils extends Logging {
 
   def getIvyProperties(): IvyProperties = {
     val Seq(packagesExclusions, packages, repositories, ivyRepoPath, 
ivySettingsPath) = Seq(
-      "spark.jars.excludes",
-      "spark.jars.packages",
-      "spark.jars.repositories",
-      "spark.jars.ivy",
-      "spark.jars.ivySettings"
+      JAR_PACKAGES_EXCLUSIONS.key,
+      JAR_PACKAGES.key,
+      JAR_REPOSITORIES.key,
+      JAR_IVY_REPO_PATH.key,
+      JAR_IVY_SETTING_PATH.key
     ).map(sys.props.get(_).orNull)
     IvyProperties(packagesExclusions, packages, repositories, ivyRepoPath, 
ivySettingsPath)
   }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to