This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6c3b7f9 [SPARK-35074][CORE] hardcoded configs move to config package 6c3b7f9 is described below commit 6c3b7f92cfaf4d11c8c9c984082ea40bd1f86abd Author: dgd-contributor <dgd_contribu...@viettel.com.vn> AuthorDate: Mon Jun 7 09:55:03 2021 -0500 [SPARK-35074][CORE] hardcoded configs move to config package ### What changes were proposed in this pull request? Currently spark.jars.xxx property keys (e.g. spark.jars.ivySettings and spark.jars.packages) are hardcoded in multiple places within Spark code across multiple modules. We should define them in config/package.scala and reference them in all other places. ### Why are the changes needed? improvement ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? no Closes #32746 from dgd-contributor/SPARK-35074_configs_should_be_moved_to_config_package.scala. Authored-by: dgd-contributor <dgd_contribu...@viettel.com.vn> Signed-off-by: Thomas Graves <tgra...@apache.org> --- .../org/apache/spark/deploy/SparkSubmit.scala | 15 +++--- .../apache/spark/deploy/SparkSubmitArguments.scala | 14 +++--- .../org/apache/spark/internal/config/package.scala | 56 ++++++++++++++++++++++ .../org/apache/spark/util/DependencyUtils.scala | 11 +++-- 4 files changed, 77 insertions(+), 19 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 568bcf9..a65be54 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -588,7 +588,8 @@ private[spark] class SparkSubmit extends Logging { OptionAssigner(args.deployMode, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = SUBMIT_DEPLOY_MODE.key), OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = "spark.app.name"), - OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, confKey = "spark.jars.ivy"), + OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, + confKey = JAR_IVY_REPO_PATH.key), OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT, confKey = DRIVER_MEMORY.key), OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, @@ -605,13 +606,13 @@ private[spark] class SparkSubmit extends Logging { // Propagate attributes for dependency resolution at the driver side OptionAssigner(args.packages, STANDALONE | MESOS | KUBERNETES, - CLUSTER, confKey = "spark.jars.packages"), + CLUSTER, confKey = JAR_PACKAGES.key), OptionAssigner(args.repositories, STANDALONE | MESOS | KUBERNETES, - CLUSTER, confKey = "spark.jars.repositories"), + CLUSTER, confKey = JAR_REPOSITORIES.key), OptionAssigner(args.ivyRepoPath, STANDALONE | MESOS | KUBERNETES, - CLUSTER, confKey = "spark.jars.ivy"), + CLUSTER, confKey = JAR_IVY_REPO_PATH.key), OptionAssigner(args.packagesExclusions, STANDALONE | MESOS | KUBERNETES, - CLUSTER, confKey = "spark.jars.excludes"), + CLUSTER, confKey = JAR_PACKAGES_EXCLUSIONS.key), // Yarn only OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.queue"), @@ -646,7 +647,7 @@ private[spark] class SparkSubmit extends Logging { confKey = DRIVER_CORES.key), OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER, confKey = DRIVER_SUPERVISE.key), - OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, confKey = "spark.jars.ivy"), + OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, confKey = JAR_IVY_REPO_PATH.key), // An internal option used only for spark-shell to add user jars to repl's classloader, // previously it uses "spark.jars" or "spark.yarn.dist.jars" which now may be pointed to @@ -1299,7 +1300,7 @@ private[spark] object SparkSubmitUtils extends Logging { val file = Option(uri.getScheme).getOrElse("file") match { case "file" => new File(uri.getPath) case scheme => throw new IllegalArgumentException(s"Scheme $scheme not supported in " + - "spark.jars.ivySettings") + JAR_IVY_SETTING_PATH.key) } require(file.exists(), s"Ivy settings file $file does not exist") require(file.isFile(), s"Ivy settings file $file is not a normal file") diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 692e7ea..47fbab5 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -185,13 +185,13 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S files = Option(files).orElse(sparkProperties.get(config.FILES.key)).orNull archives = Option(archives).orElse(sparkProperties.get(config.ARCHIVES.key)).orNull pyFiles = Option(pyFiles).orElse(sparkProperties.get(config.SUBMIT_PYTHON_FILES.key)).orNull - ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull - ivySettingsPath = sparkProperties.get("spark.jars.ivySettings") - packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull + ivyRepoPath = sparkProperties.get(config.JAR_IVY_REPO_PATH.key).orNull + ivySettingsPath = sparkProperties.get(config.JAR_IVY_SETTING_PATH.key) + packages = Option(packages).orElse(sparkProperties.get(config.JAR_PACKAGES.key)).orNull packagesExclusions = Option(packagesExclusions) - .orElse(sparkProperties.get("spark.jars.excludes")).orNull + .orElse(sparkProperties.get(config.JAR_PACKAGES_EXCLUSIONS.key)).orNull repositories = Option(repositories) - .orElse(sparkProperties.get("spark.jars.repositories")).orNull + .orElse(sparkProperties.get(config.JAR_REPOSITORIES.key)).orNull deployMode = Option(deployMode) .orElse(sparkProperties.get(config.SUBMIT_DEPLOY_MODE.key)) .orElse(env.get("DEPLOY_MODE")) @@ -200,11 +200,11 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S .getOrElse(sparkProperties.get(config.EXECUTOR_INSTANCES.key).orNull) queue = Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull keytab = Option(keytab) - .orElse(sparkProperties.get("spark.kerberos.keytab")) + .orElse(sparkProperties.get(config.KEYTAB.key)) .orElse(sparkProperties.get("spark.yarn.keytab")) .orNull principal = Option(principal) - .orElse(sparkProperties.get("spark.kerberos.principal")) + .orElse(sparkProperties.get(config.PRINCIPAL.key)) .orElse(sparkProperties.get("spark.yarn.principal")) .orNull dynamicAllocationEnabled = diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index f8d6753..9574416 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2148,4 +2148,60 @@ package object config { // batch of block will be loaded in memory with memory mapping, which has higher overhead // with small MB sized chunk of data. .createWithDefaultString("3m") + + private[spark] val JAR_IVY_REPO_PATH = + ConfigBuilder("spark.jars.ivy") + .doc("Path to specify the Ivy user directory, used for the local Ivy cache and " + + "package files from spark.jars.packages. " + + "This will override the Ivy property ivy.default.ivy.user.dir " + + "which defaults to ~/.ivy2.") + .version("1.3.0") + .stringConf + .createOptional + + private[spark] val JAR_IVY_SETTING_PATH = + ConfigBuilder("spark.jars.ivySettings") + .doc("Path to an Ivy settings file to customize resolution of jars specified " + + "using spark.jars.packages instead of the built-in defaults, such as maven central. " + + "Additional repositories given by the command-line option --repositories " + + "or spark.jars.repositories will also be included. " + + "Useful for allowing Spark to resolve artifacts from behind a firewall " + + "e.g. via an in-house artifact server like Artifactory. " + + "Details on the settings file format can be found at Settings Files") + .version("2.2.0") + .stringConf + .createOptional + + private[spark] val JAR_PACKAGES = + ConfigBuilder("spark.jars.packages") + .doc("Comma-separated list of Maven coordinates of jars to include " + + "on the driver and executor classpaths. The coordinates should be " + + "groupId:artifactId:version. If spark.jars.ivySettings is given artifacts " + + "will be resolved according to the configuration in the file, otherwise artifacts " + + "will be searched for in the local maven repo, then maven central and finally " + + "any additional remote repositories given by the command-line option --repositories. " + + "For more details, see Advanced Dependency Management.") + .version("1.5.0") + .stringConf + .toSequence + .createWithDefault(Nil) + + private[spark] val JAR_PACKAGES_EXCLUSIONS = + ConfigBuilder("spark.jars.excludes") + .doc("Comma-separated list of groupId:artifactId, " + + "to exclude while resolving the dependencies provided in spark.jars.packages " + + "to avoid dependency conflicts.") + .version("1.5.0") + .stringConf + .toSequence + .createWithDefault(Nil) + + private[spark] val JAR_REPOSITORIES = + ConfigBuilder("spark.jars.repositories") + .doc("Comma-separated list of additional remote repositories to search " + + "for the maven coordinates given with --packages or spark.jars.packages.") + .version("2.3.0") + .stringConf + .toSequence + .createWithDefault(Nil) } diff --git a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala index f7135edd..da8ea4f 100644 --- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.SparkSubmitUtils import org.apache.spark.internal.Logging +import org.apache.spark.internal.config._ case class IvyProperties( packagesExclusions: String, @@ -39,11 +40,11 @@ private[spark] object DependencyUtils extends Logging { def getIvyProperties(): IvyProperties = { val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) = Seq( - "spark.jars.excludes", - "spark.jars.packages", - "spark.jars.repositories", - "spark.jars.ivy", - "spark.jars.ivySettings" + JAR_PACKAGES_EXCLUSIONS.key, + JAR_PACKAGES.key, + JAR_REPOSITORIES.key, + JAR_IVY_REPO_PATH.key, + JAR_IVY_SETTING_PATH.key ).map(sys.props.get(_).orNull) IvyProperties(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org