This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 3baa60afe25c [SPARK-44914][BUILD] Upgrade Apache Ivy to 2.5.2 3baa60afe25c is described below commit 3baa60afe25c821ced1e956502f7c77b719f73dd Author: Dongjoon Hyun <dh...@apple.com> AuthorDate: Fri Feb 23 08:36:32 2024 -0800 [SPARK-44914][BUILD] Upgrade Apache Ivy to 2.5.2 ### What changes were proposed in this pull request? This PR aims to upgrade Apache Ivy to 2.5.2 and protect old Ivy-based systems like old Spark from Apache Ivy 2.5.2's incompatibility by introducing a new `.ivy2.5.2` directory. - Apache Spark 4.0.0 will create this once and reuse this directory while all the other systems like old Sparks uses the old one, `.ivy2`. So, the behavior is the same with the case where Apache Spark 4.0.0 is installed and used in a new machine. - For the environments with `User-provided Ivy-path`es, the user might hit the incompatibility still. However, the users can mitigate them because they already have full control on `Ivy-path`es. ### Why are the changes needed? This was tried once and reverted logically due to Java 11 and Java 17 failures in Daily CIs. - #42613 - #42668 Currently, PR Builder also fails as of now. If the PR passes CIes, we can achieve the following. - [Release notes](https://lists.apache.org/thread/9gcz4xrsn8c7o9gb377xfzvkb8jltffr) - FIX: CVE-2022-46751: Apache Ivy Is Vulnerable to XML External Entity Injections ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs including `HiveExternalCatalogVersionsSuite`. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45075 from dongjoon-hyun/SPARK-44914. Authored-by: Dongjoon Hyun <dh...@apple.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../main/scala/org/apache/spark/util/MavenUtils.scala | 17 ++++++++++++++--- .../test/scala/org/apache/spark/util/IvyTestUtils.scala | 3 ++- .../org/apache/spark/internal/config/package.scala | 4 ++-- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- dev/run-tests.py | 2 ++ docs/core-migration-guide.md | 2 ++ pom.xml | 6 +----- 7 files changed, 24 insertions(+), 12 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala index 65530b7fa473..08291859a32c 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala @@ -324,6 +324,14 @@ private[spark] object MavenUtils extends Logging { val ivySettings: IvySettings = new IvySettings try { ivySettings.load(file) + if (ivySettings.getDefaultIvyUserDir == null && ivySettings.getDefaultCache == null) { + // To protect old Ivy-based systems like old Spark from Apache Ivy 2.5.2's incompatibility. + // `processIvyPathArg` can overwrite these later. + val alternateIvyDir = System.getProperty("ivy.home", + System.getProperty("user.home") + File.separator + ".ivy2.5.2") + ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir)) + ivySettings.setDefaultCache(new File(alternateIvyDir, "cache")) + } } catch { case e @ (_: IOException | _: ParseException) => throw new SparkException(s"Failed when loading Ivy settings from $settingsFile", e) @@ -335,10 +343,13 @@ private[spark] object MavenUtils extends Logging { /* Set ivy settings for location of cache, if option is supplied */ private def processIvyPathArg(ivySettings: IvySettings, ivyPath: Option[String]): Unit = { - ivyPath.filterNot(_.trim.isEmpty).foreach { alternateIvyDir => - ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir)) - ivySettings.setDefaultCache(new File(alternateIvyDir, "cache")) + val alternateIvyDir = ivyPath.filterNot(_.trim.isEmpty).getOrElse { + // To protect old Ivy-based systems like old Spark from Apache Ivy 2.5.2's incompatibility. + System.getProperty("ivy.home", + System.getProperty("user.home") + File.separator + ".ivy2.5.2") } + ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir)) + ivySettings.setDefaultCache(new File(alternateIvyDir, "cache")) } /* Add any optional additional remote repositories */ diff --git a/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala b/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala index 50312646bdb7..76062074edca 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala @@ -374,7 +374,8 @@ private[spark] object IvyTestUtils { f(repo.toURI.toString) } finally { // Clean up - if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2")) { + if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2") || + repo.toString.contains(".ivy2.5.2")) { val groupDir = getBaseGroupDirectory(artifact, useIvyLayout) FileUtils.deleteDirectory(new File(repo, groupDir + File.separator + artifact.artifactId)) deps.foreach { _.foreach { dep => diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 7c8cfc9f208f..0b026a888e88 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2491,10 +2491,10 @@ package object config { .doc("Path to specify the Ivy user directory, used for the local Ivy cache and " + "package files from spark.jars.packages. " + "This will override the Ivy property ivy.default.ivy.user.dir " + - "which defaults to ~/.ivy2.") + "which defaults to ~/.ivy2.5.2") .version("1.3.0") .stringConf - .createOptional + .createWithDefault("~/.ivy2.5.2") private[spark] val JAR_IVY_SETTING_PATH = ConfigBuilder(MavenUtils.JAR_IVY_SETTING_PATH_KEY) diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 97205011e265..bac74d4214d4 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -102,7 +102,7 @@ httpcore/4.4.16//httpcore-4.4.16.jar icu4j/72.1//icu4j-72.1.jar ini4j/0.5.4//ini4j-0.5.4.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar -ivy/2.5.1//ivy-2.5.1.jar +ivy/2.5.2//ivy-2.5.2.jar jackson-annotations/2.16.1//jackson-annotations-2.16.1.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.16.1//jackson-core-2.16.1.jar diff --git a/dev/run-tests.py b/dev/run-tests.py index 3fe79af58d71..eb760139f9b6 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -478,6 +478,8 @@ def main(): rm_r(os.path.join(SPARK_HOME, "work")) rm_r(os.path.join(USER_HOME, ".ivy2", "local", "org.apache.spark")) rm_r(os.path.join(USER_HOME, ".ivy2", "cache", "org.apache.spark")) + rm_r(os.path.join(USER_HOME, ".ivy2.5.2", "local", "org.apache.spark")) + rm_r(os.path.join(USER_HOME, ".ivy2.5.2", "cache", "org.apache.spark")) os.environ["CURRENT_BLOCK"] = str(ERROR_CODES["BLOCK_GENERAL"]) diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 26e6b0f1f444..3adfbeca8fd9 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -36,6 +36,8 @@ license: | - Since Spark 4.0, Spark uses `ReadWriteOncePod` instead of `ReadWriteOnce` access mode in persistence volume claims. To restore the legacy behavior, you can set `spark.kubernetes.legacy.useReadWriteOnceAccessMode` to `true`. +- Since Spark 4.0, Spark uses `~/.ivy2.5.2` as Ivy user directory by default to isolate the existing systems from Apache Ivy's incompatibility. To restore the legacy behavior, you can set `spark.jars.ivy` to `~/.ivy2`. + ## Upgrading from Core 3.4 to 3.5 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead. diff --git a/pom.xml b/pom.xml index 124552a42ff7..2ee31bd63650 100644 --- a/pom.xml +++ b/pom.xml @@ -146,11 +146,7 @@ <jetty.version>10.0.19</jetty.version> <jakartaservlet.version>4.0.3</jakartaservlet.version> <chill.version>0.10.0</chill.version> - <!-- - SPARK-44968: don't upgrade Ivy to version 2.5.2 until the test aborted of - `HiveExternalCatalogVersionsSuite` in Java 11/17 daily tests is resolved. - --> - <ivy.version>2.5.1</ivy.version> + <ivy.version>2.5.2</ivy.version> <oro.version>2.0.8</oro.version> <!-- If you change codahale.metrics.version, you also need to change --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org