This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new fb9c633 [SPARK-38631][CORE] Uses Java-based implementation for un-tarring at Utils.unpack fb9c633 is described below commit fb9c633db950d7af70376ff1d076d36297e21c36 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Thu Mar 24 12:12:38 2022 +0900 [SPARK-38631][CORE] Uses Java-based implementation for un-tarring at Utils.unpack ### What changes were proposed in this pull request? This PR proposes to use `FileUtil.unTarUsingJava` that is a Java implementation for un-tar `.tar` files. `unTarUsingJava` is not public but it exists in all Hadoop versions from 2.1+, see HADOOP-9264. The security issue reproduction requires a non-Windows platform, and a non-gzipped TAR archive file name (contents don't matter). ### Why are the changes needed? There is a risk for arbitrary shell command injection via `Utils.unpack` when the filename is controlled by a malicious user. This is due to an issue in Hadoop's `unTar`, that is not properly escaping the filename before passing to a shell command:https://github.com/apache/hadoop/blob/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java#L904 ### Does this PR introduce _any_ user-facing change? Yes, it prevents a security issue that, previously, allowed users to execute arbitrary shall command. ### How was this patch tested? Manually tested in local, and existing test cases should cover. Closes #35946 from HyukjinKwon/SPARK-38631. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit 057c051285ec32c665fb458d0670c1c16ba536b2) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../main/scala/org/apache/spark/util/Utils.scala | 30 ++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index c8c7ea6..24b3d2b 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -600,16 +600,42 @@ private[spark] object Utils extends Logging { if (lowerSrc.endsWith(".jar")) { RunJar.unJar(source, dest, RunJar.MATCH_ANY) } else if (lowerSrc.endsWith(".zip")) { + // TODO(SPARK-37677): should keep file permissions. Java implementation doesn't. FileUtil.unZip(source, dest) - } else if ( - lowerSrc.endsWith(".tar.gz") || lowerSrc.endsWith(".tgz") || lowerSrc.endsWith(".tar")) { + } else if (lowerSrc.endsWith(".tar.gz") || lowerSrc.endsWith(".tgz")) { FileUtil.unTar(source, dest) + } else if (lowerSrc.endsWith(".tar")) { + // TODO(SPARK-38632): should keep file permissions. Java implementation doesn't. + unTarUsingJava(source, dest) } else { logWarning(s"Cannot unpack $source, just copying it to $dest.") copyRecursive(source, dest) } } + /** + * The method below was copied from `FileUtil.unTar` but uses Java-based implementation + * to work around a security issue, see also SPARK-38631. + */ + private def unTarUsingJava(source: File, dest: File): Unit = { + if (!dest.mkdirs && !dest.isDirectory) { + throw new IOException(s"Mkdirs failed to create $dest") + } else { + try { + // Should not fail because all Hadoop 2.1+ (from HADOOP-9264) + // have 'unTarUsingJava'. + val mth = classOf[FileUtil].getDeclaredMethod( + "unTarUsingJava", classOf[File], classOf[File], classOf[Boolean]) + mth.setAccessible(true) + mth.invoke(null, source, dest, java.lang.Boolean.FALSE) + } catch { + // Re-throw the original exception. + case e: java.lang.reflect.InvocationTargetException if e.getCause != null => + throw e.getCause + } + } + } + /** Records the duration of running `body`. */ def timeTakenMs[T](body: => T): (T, Long) = { val startTime = System.nanoTime() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org