This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 3baa60afe25c [SPARK-44914][BUILD] Upgrade Apache Ivy to 2.5.2
3baa60afe25c is described below

commit 3baa60afe25c821ced1e956502f7c77b719f73dd
Author: Dongjoon Hyun <dh...@apple.com>
AuthorDate: Fri Feb 23 08:36:32 2024 -0800

    [SPARK-44914][BUILD] Upgrade Apache Ivy to 2.5.2
    
    ### What changes were proposed in this pull request?
    
    This PR aims to upgrade Apache Ivy to 2.5.2 and protect old Ivy-based 
systems like old Spark from Apache Ivy 2.5.2's incompatibility by introducing a 
new `.ivy2.5.2` directory.
    
    - Apache Spark 4.0.0 will create this once and reuse this directory while 
all the other systems like old Sparks uses the old one, `.ivy2`. So, the 
behavior is the same with the case where Apache Spark 4.0.0 is installed and 
used in a new machine.
    
    - For the environments with `User-provided Ivy-path`es, the user might hit 
the incompatibility still. However, the users can mitigate them because they 
already have full control on `Ivy-path`es.
    
    ### Why are the changes needed?
    
    This was tried once and reverted logically due to Java 11 and Java 17 
failures in Daily CIs.
    - #42613
    - #42668
    
    Currently, PR Builder also fails as of now. If the PR passes CIes, we can 
achieve the following.
    
    - [Release 
notes](https://lists.apache.org/thread/9gcz4xrsn8c7o9gb377xfzvkb8jltffr)
        - FIX: CVE-2022-46751: Apache Ivy Is Vulnerable to XML External Entity 
Injections
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Pass the CIs including `HiveExternalCatalogVersionsSuite`.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #45075 from dongjoon-hyun/SPARK-44914.
    
    Authored-by: Dongjoon Hyun <dh...@apple.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../main/scala/org/apache/spark/util/MavenUtils.scala   | 17 ++++++++++++++---
 .../test/scala/org/apache/spark/util/IvyTestUtils.scala |  3 ++-
 .../org/apache/spark/internal/config/package.scala      |  4 ++--
 dev/deps/spark-deps-hadoop-3-hive-2.3                   |  2 +-
 dev/run-tests.py                                        |  2 ++
 docs/core-migration-guide.md                            |  2 ++
 pom.xml                                                 |  6 +-----
 7 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala 
b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala
index 65530b7fa473..08291859a32c 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala
@@ -324,6 +324,14 @@ private[spark] object MavenUtils extends Logging {
     val ivySettings: IvySettings = new IvySettings
     try {
       ivySettings.load(file)
+      if (ivySettings.getDefaultIvyUserDir == null && 
ivySettings.getDefaultCache == null) {
+        // To protect old Ivy-based systems like old Spark from Apache Ivy 
2.5.2's incompatibility.
+        // `processIvyPathArg` can overwrite these later.
+        val alternateIvyDir = System.getProperty("ivy.home",
+          System.getProperty("user.home") + File.separator + ".ivy2.5.2")
+        ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir))
+        ivySettings.setDefaultCache(new File(alternateIvyDir, "cache"))
+      }
     } catch {
       case e @ (_: IOException | _: ParseException) =>
         throw new SparkException(s"Failed when loading Ivy settings from 
$settingsFile", e)
@@ -335,10 +343,13 @@ private[spark] object MavenUtils extends Logging {
 
   /* Set ivy settings for location of cache, if option is supplied */
   private def processIvyPathArg(ivySettings: IvySettings, ivyPath: 
Option[String]): Unit = {
-    ivyPath.filterNot(_.trim.isEmpty).foreach { alternateIvyDir =>
-      ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir))
-      ivySettings.setDefaultCache(new File(alternateIvyDir, "cache"))
+    val alternateIvyDir = ivyPath.filterNot(_.trim.isEmpty).getOrElse {
+      // To protect old Ivy-based systems like old Spark from Apache Ivy 
2.5.2's incompatibility.
+      System.getProperty("ivy.home",
+        System.getProperty("user.home") + File.separator + ".ivy2.5.2")
     }
+    ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir))
+    ivySettings.setDefaultCache(new File(alternateIvyDir, "cache"))
   }
 
   /* Add any optional additional remote repositories */
diff --git 
a/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala 
b/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala
index 50312646bdb7..76062074edca 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala
@@ -374,7 +374,8 @@ private[spark] object IvyTestUtils {
       f(repo.toURI.toString)
     } finally {
       // Clean up
-      if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2")) {
+      if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2") ||
+          repo.toString.contains(".ivy2.5.2")) {
         val groupDir = getBaseGroupDirectory(artifact, useIvyLayout)
         FileUtils.deleteDirectory(new File(repo, groupDir + File.separator + 
artifact.artifactId))
         deps.foreach { _.foreach { dep =>
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala 
b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 7c8cfc9f208f..0b026a888e88 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -2491,10 +2491,10 @@ package object config {
       .doc("Path to specify the Ivy user directory, used for the local Ivy 
cache and " +
         "package files from spark.jars.packages. " +
         "This will override the Ivy property ivy.default.ivy.user.dir " +
-        "which defaults to ~/.ivy2.")
+        "which defaults to ~/.ivy2.5.2")
       .version("1.3.0")
       .stringConf
-      .createOptional
+      .createWithDefault("~/.ivy2.5.2")
 
   private[spark] val JAR_IVY_SETTING_PATH =
     ConfigBuilder(MavenUtils.JAR_IVY_SETTING_PATH_KEY)
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 
b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 97205011e265..bac74d4214d4 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -102,7 +102,7 @@ httpcore/4.4.16//httpcore-4.4.16.jar
 icu4j/72.1//icu4j-72.1.jar
 ini4j/0.5.4//ini4j-0.5.4.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
-ivy/2.5.1//ivy-2.5.1.jar
+ivy/2.5.2//ivy-2.5.2.jar
 jackson-annotations/2.16.1//jackson-annotations-2.16.1.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
 jackson-core/2.16.1//jackson-core-2.16.1.jar
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 3fe79af58d71..eb760139f9b6 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -478,6 +478,8 @@ def main():
     rm_r(os.path.join(SPARK_HOME, "work"))
     rm_r(os.path.join(USER_HOME, ".ivy2", "local", "org.apache.spark"))
     rm_r(os.path.join(USER_HOME, ".ivy2", "cache", "org.apache.spark"))
+    rm_r(os.path.join(USER_HOME, ".ivy2.5.2", "local", "org.apache.spark"))
+    rm_r(os.path.join(USER_HOME, ".ivy2.5.2", "cache", "org.apache.spark"))
 
     os.environ["CURRENT_BLOCK"] = str(ERROR_CODES["BLOCK_GENERAL"])
 
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 26e6b0f1f444..3adfbeca8fd9 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -36,6 +36,8 @@ license: |
 
 - Since Spark 4.0, Spark uses `ReadWriteOncePod` instead of `ReadWriteOnce` 
access mode in persistence volume claims. To restore the legacy behavior, you 
can set `spark.kubernetes.legacy.useReadWriteOnceAccessMode` to `true`.
 
+- Since Spark 4.0, Spark uses `~/.ivy2.5.2` as Ivy user directory by default 
to isolate the existing systems from Apache Ivy's incompatibility. To restore 
the legacy behavior, you can set `spark.jars.ivy` to `~/.ivy2`.
+
 ## Upgrading from Core 3.4 to 3.5
 
 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is 
deprecated. Use `spark.executor.failuresValidityInterval` instead.
diff --git a/pom.xml b/pom.xml
index 124552a42ff7..2ee31bd63650 100644
--- a/pom.xml
+++ b/pom.xml
@@ -146,11 +146,7 @@
     <jetty.version>10.0.19</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>
     <chill.version>0.10.0</chill.version>
-    <!--
-      SPARK-44968: don't upgrade Ivy to version 2.5.2 until the test aborted of
-      `HiveExternalCatalogVersionsSuite` in Java 11/17 daily tests is resolved.
-    -->
-    <ivy.version>2.5.1</ivy.version>
+    <ivy.version>2.5.2</ivy.version>
     <oro.version>2.0.8</oro.version>
     <!--
     If you change codahale.metrics.version, you also need to change


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to