This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new fb9c633  [SPARK-38631][CORE] Uses Java-based implementation for 
un-tarring at Utils.unpack
fb9c633 is described below

commit fb9c633db950d7af70376ff1d076d36297e21c36
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Thu Mar 24 12:12:38 2022 +0900

    [SPARK-38631][CORE] Uses Java-based implementation for un-tarring at 
Utils.unpack
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to use `FileUtil.unTarUsingJava` that is a Java 
implementation for un-tar `.tar` files. `unTarUsingJava` is not public but it 
exists in all Hadoop versions from 2.1+, see HADOOP-9264.
    
    The security issue reproduction requires a non-Windows platform, and a 
non-gzipped TAR archive file name (contents don't matter).
    
    ### Why are the changes needed?
    
    There is a risk for arbitrary shell command injection via `Utils.unpack` 
when the filename is controlled by a malicious user. This is due to an issue in 
Hadoop's `unTar`, that is not properly escaping the filename before passing to 
a shell 
command:https://github.com/apache/hadoop/blob/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java#L904
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, it prevents a security issue that, previously, allowed users to 
execute arbitrary shall command.
    
    ### How was this patch tested?
    
    Manually tested in local, and existing test cases should cover.
    
    Closes #35946 from HyukjinKwon/SPARK-38631.
    
    Authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
    (cherry picked from commit 057c051285ec32c665fb458d0670c1c16ba536b2)
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../main/scala/org/apache/spark/util/Utils.scala   | 30 ++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala 
b/core/src/main/scala/org/apache/spark/util/Utils.scala
index c8c7ea6..24b3d2b 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -600,16 +600,42 @@ private[spark] object Utils extends Logging {
     if (lowerSrc.endsWith(".jar")) {
       RunJar.unJar(source, dest, RunJar.MATCH_ANY)
     } else if (lowerSrc.endsWith(".zip")) {
+      // TODO(SPARK-37677): should keep file permissions. Java implementation 
doesn't.
       FileUtil.unZip(source, dest)
-    } else if (
-      lowerSrc.endsWith(".tar.gz") || lowerSrc.endsWith(".tgz") || 
lowerSrc.endsWith(".tar")) {
+    } else if (lowerSrc.endsWith(".tar.gz") || lowerSrc.endsWith(".tgz")) {
       FileUtil.unTar(source, dest)
+    } else if (lowerSrc.endsWith(".tar")) {
+      // TODO(SPARK-38632): should keep file permissions. Java implementation 
doesn't.
+      unTarUsingJava(source, dest)
     } else {
       logWarning(s"Cannot unpack $source, just copying it to $dest.")
       copyRecursive(source, dest)
     }
   }
 
+  /**
+   * The method below was copied from `FileUtil.unTar` but uses Java-based 
implementation
+   * to work around a security issue, see also SPARK-38631.
+   */
+  private def unTarUsingJava(source: File, dest: File): Unit = {
+    if (!dest.mkdirs && !dest.isDirectory) {
+      throw new IOException(s"Mkdirs failed to create $dest")
+    } else {
+      try {
+        // Should not fail because all Hadoop 2.1+ (from HADOOP-9264)
+        // have 'unTarUsingJava'.
+        val mth = classOf[FileUtil].getDeclaredMethod(
+          "unTarUsingJava", classOf[File], classOf[File], classOf[Boolean])
+        mth.setAccessible(true)
+        mth.invoke(null, source, dest, java.lang.Boolean.FALSE)
+      } catch {
+        // Re-throw the original exception.
+        case e: java.lang.reflect.InvocationTargetException if e.getCause != 
null =>
+          throw e.getCause
+      }
+    }
+  }
+
   /** Records the duration of running `body`. */
   def timeTakenMs[T](body: => T): (T, Long) = {
     val startTime = System.nanoTime()

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to