This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6d74557c385 [SPARK-39102][CORE][SQL][DSTREAM] Add checkstyle rules to disabled use of Guava's `Files.createTempDir()` 6d74557c385 is described below commit 6d74557c385d487fb50b5603d9c322d0ae9d9b74 Author: yangjie01 <yangji...@baidu.com> AuthorDate: Tue May 17 08:44:29 2022 -0500 [SPARK-39102][CORE][SQL][DSTREAM] Add checkstyle rules to disabled use of Guava's `Files.createTempDir()` ### What changes were proposed in this pull request? The main change of this pr as follows: - Add a checkstyle to `scalastyle-config.xml` to disabled use of Guava's `Files.createTempDir()` for Scala - Add a checkstyle to `dev/checkstyle.xml` to disabled use of Guava's `Files.createTempDir()` for Java - Introduce `JavaUtils.createTempDir()` method to replace the use of Guava's `Files.createTempDir()` in Java code - Use `Utils.createTempDir()` to replace the use of Guava's `Files.createTempDir()` in Scala code ### Why are the changes needed? Avoid the use of Guava's `Files.createTempDir()` in Spark code due to [CVE-2020-8908](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8908) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GA Closes #36529 from LuciferYang/SPARK-39102. Authored-by: yangjie01 <yangji...@baidu.com> Signed-off-by: Sean Owen <sro...@gmail.com> --- .../org/apache/spark/network/util/JavaUtils.java | 56 +++++++++++++++++++++ .../org/apache/spark/network/StreamTestHelper.java | 4 +- .../apache/spark/network/util/JavaUtilsSuite.java | 58 ++++++++++++++++++++++ .../network/shuffle/ExternalBlockHandlerSuite.java | 4 +- .../network/shuffle/TestShuffleDataContext.java | 5 +- .../test/org/apache/spark/Java8RDDAPISuite.java | 7 +-- .../java/test/org/apache/spark/JavaAPISuite.java | 5 +- .../org/apache/spark/deploy/IvyTestUtils.scala | 3 +- .../apache/spark/deploy/RPackageUtilsSuite.scala | 3 +- dev/checkstyle.xml | 6 +++ scalastyle-config.xml | 7 +++ .../org/apache/spark/streaming/JavaAPISuite.java | 9 ++-- 12 files changed, 147 insertions(+), 20 deletions(-) diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java index b5497087634..f699bdd95c3 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java @@ -21,7 +21,9 @@ import java.io.*; import java.nio.ByteBuffer; import java.nio.channels.ReadableByteChannel; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.Locale; +import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -362,6 +364,60 @@ public class JavaUtils { } } + /** + * Create a temporary directory inside `java.io.tmpdir` with default namePrefix "spark". + * The directory will be automatically deleted when the VM shuts down. + */ + public static File createTempDir() throws IOException { + return createTempDir(System.getProperty("java.io.tmpdir"), "spark"); + } + + /** + * Create a temporary directory inside the given parent directory. The directory will be + * automatically deleted when the VM shuts down. + */ + public static File createTempDir(String root, String namePrefix) throws IOException { + if (root == null) root = System.getProperty("java.io.tmpdir"); + if (namePrefix == null) namePrefix = "spark"; + File dir = createDirectory(root, namePrefix); + dir.deleteOnExit(); + return dir; + } + + /** + * Create a directory inside the given parent directory with default namePrefix "spark". + * The directory is guaranteed to be newly created, and is not marked for automatic deletion. + */ + public static File createDirectory(String root) throws IOException { + return createDirectory(root, "spark"); + } + + /** + * Create a directory inside the given parent directory. The directory is guaranteed to be + * newly created, and is not marked for automatic deletion. + */ + public static File createDirectory(String root, String namePrefix) throws IOException { + if (namePrefix == null) namePrefix = "spark"; + int attempts = 0; + int maxAttempts = 10; + File dir = null; + while (dir == null) { + attempts += 1; + if (attempts > maxAttempts) { + throw new IOException("Failed to create a temp directory (under " + root + ") after " + + maxAttempts + " attempts!"); + } + try { + dir = new File(root, namePrefix + "-" + UUID.randomUUID()); + Files.createDirectories(dir.toPath()); + } catch (IOException | SecurityException e) { + logger.error("Failed to create directory " + dir, e); + dir = null; + } + } + return dir.getCanonicalFile(); + } + /** * Fills a buffer with data read from the channel. */ diff --git a/common/network-common/src/test/java/org/apache/spark/network/StreamTestHelper.java b/common/network-common/src/test/java/org/apache/spark/network/StreamTestHelper.java index b93cad4652a..3ba6a585653 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/StreamTestHelper.java +++ b/common/network-common/src/test/java/org/apache/spark/network/StreamTestHelper.java @@ -23,8 +23,6 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Random; -import com.google.common.io.Files; - import org.apache.spark.network.buffer.FileSegmentManagedBuffer; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.buffer.NioManagedBuffer; @@ -51,7 +49,7 @@ class StreamTestHelper { } StreamTestHelper() throws Exception { - tempDir = Files.createTempDir(); + tempDir = JavaUtils.createTempDir(); emptyBuffer = createBuffer(0); smallBuffer = createBuffer(100); largeBuffer = createBuffer(100000); diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java new file mode 100644 index 00000000000..0a80b12c12a --- /dev/null +++ b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.network.util; + +import java.io.File; +import java.io.IOException; + +import org.junit.Test; + +import static org.junit.Assert.*; + +public class JavaUtilsSuite { + + @Test + public void testCreateDirectory() throws IOException { + File tmpDir = new File(System.getProperty("java.io.tmpdir")); + File testDir = new File(tmpDir, "createDirectory" + System.nanoTime()); + String testDirPath = testDir.getCanonicalPath(); + + // 1. Directory created successfully + assertTrue(JavaUtils.createDirectory(testDirPath, "scenario1").exists()); + + // 2. Illegal file path + StringBuilder namePrefix = new StringBuilder(); + for (int i = 0; i < 256; i++) { + namePrefix.append("scenario2"); + } + assertThrows(IOException.class, + () -> JavaUtils.createDirectory(testDirPath, namePrefix.toString())); + + // 3. The parent directory cannot read + assertTrue(testDir.canRead()); + assertTrue(testDir.setReadable(false)); + assertTrue(JavaUtils.createDirectory(testDirPath, "scenario3").exists()); + assertTrue(testDir.setReadable(true)); + + // 4. The parent directory cannot write + assertTrue(testDir.canWrite()); + assertTrue(testDir.setWritable(false)); + assertThrows(IOException.class, + () -> JavaUtils.createDirectory(testDirPath, "scenario4")); + assertTrue(testDir.setWritable(true)); + } +} diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java index 18efc10df7e..f681af71e40 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java @@ -28,7 +28,6 @@ import com.codahale.metrics.Meter; import com.codahale.metrics.Metric; import com.codahale.metrics.Timer; import com.google.common.io.ByteStreams; -import com.google.common.io.Files; import org.junit.Before; import org.junit.Test; import org.mockito.ArgumentCaptor; @@ -60,6 +59,7 @@ import org.apache.spark.network.shuffle.protocol.OpenBlocks; import org.apache.spark.network.shuffle.protocol.RegisterExecutor; import org.apache.spark.network.shuffle.protocol.StreamHandle; import org.apache.spark.network.shuffle.protocol.UploadBlock; +import org.apache.spark.network.util.JavaUtils; public class ExternalBlockHandlerSuite { TransportClient client = mock(TransportClient.class); @@ -126,7 +126,7 @@ public class ExternalBlockHandlerSuite { int reduceId = 0; // prepare the checksum file - File tmpDir = Files.createTempDir(); + File tmpDir = JavaUtils.createTempDir(); File checksumFile = new File(tmpDir, "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".checksum." + algorithm); DataOutputStream out = new DataOutputStream(new FileOutputStream(checksumFile)); diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java index bcf57ea6219..5661504d621 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.io.OutputStream; import com.google.common.io.Closeables; -import com.google.common.io.Files; import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo; import org.apache.spark.network.util.JavaUtils; @@ -47,9 +46,9 @@ public class TestShuffleDataContext { this.subDirsPerLocalDir = subDirsPerLocalDir; } - public void create() { + public void create() throws IOException { for (int i = 0; i < localDirs.length; i ++) { - localDirs[i] = Files.createTempDir().getAbsolutePath(); + localDirs[i] = JavaUtils.createTempDir().getAbsolutePath(); for (int p = 0; p < subDirsPerLocalDir; p ++) { new File(localDirs[i], String.format("%02x", p)).mkdirs(); diff --git a/core/src/test/java/test/org/apache/spark/Java8RDDAPISuite.java b/core/src/test/java/test/org/apache/spark/Java8RDDAPISuite.java index 1d2b05ebc25..51758979fc5 100644 --- a/core/src/test/java/test/org/apache/spark/Java8RDDAPISuite.java +++ b/core/src/test/java/test/org/apache/spark/Java8RDDAPISuite.java @@ -18,13 +18,14 @@ package test.org.apache.spark; import java.io.File; +import java.io.IOException; import java.io.Serializable; import java.util.*; +import org.apache.spark.network.util.JavaUtils; import scala.Tuple2; import com.google.common.collect.Iterables; -import com.google.common.io.Files; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.SequenceFileOutputFormat; @@ -244,8 +245,8 @@ public class Java8RDDAPISuite implements Serializable { } @Test - public void sequenceFile() { - File tempDir = Files.createTempDir(); + public void sequenceFile() throws IOException { + File tempDir = JavaUtils.createTempDir(); tempDir.deleteOnExit(); String outputDir = new File(tempDir, "output").getAbsolutePath(); List<Tuple2<Integer, String>> pairs = Arrays.asList( diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java index cba43d949ad..5c8529d5135 100644 --- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java +++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java @@ -39,6 +39,7 @@ import org.apache.spark.Partitioner; import org.apache.spark.SparkConf; import org.apache.spark.TaskContext; import org.apache.spark.TaskContext$; +import org.apache.spark.network.util.JavaUtils; import scala.Tuple2; import scala.Tuple3; import scala.Tuple4; @@ -90,9 +91,9 @@ public class JavaAPISuite implements Serializable { private transient File tempDir; @Before - public void setUp() { + public void setUp() throws IOException { sc = new JavaSparkContext("local", "JavaAPISuite"); - tempDir = Files.createTempDir(); + tempDir = JavaUtils.createTempDir(); tempDir.deleteOnExit(); } diff --git a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala index b986be03e96..0dcdba3dfb8 100644 --- a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala +++ b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala @@ -30,6 +30,7 @@ import org.apache.ivy.core.settings.IvySettings import org.apache.spark.TestUtils.{createCompiledClass, JavaSourceFromString} import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate +import org.apache.spark.util.Utils private[deploy] object IvyTestUtils { @@ -294,7 +295,7 @@ private[deploy] object IvyTestUtils { withPython: Boolean = false, withR: Boolean = false): File = { // Where the root of the repository exists, and what Ivy will search in - val tempPath = tempDir.getOrElse(Files.createTempDir()) + val tempPath = tempDir.getOrElse(Utils.createTempDir()) // Create directory if it doesn't exist Files.createParentDirs(tempPath) // Where to create temporary class files and such diff --git a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala index d04d9b6dcb2..57269d62593 100644 --- a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala @@ -26,7 +26,6 @@ import java.util.zip.ZipFile import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer -import com.google.common.io.Files import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfterEach @@ -144,7 +143,7 @@ class RPackageUtilsSuite } test("SparkR zipping works properly") { - val tempDir = Files.createTempDir() + val tempDir = Utils.createTempDir() Utils.tryWithSafeFinally { IvyTestUtils.writeFile(tempDir, "test.R", "abc") val fakeSparkRDir = new File(tempDir, "SparkR") diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml index 39b7b158cef..343eaa4cfda 100644 --- a/dev/checkstyle.xml +++ b/dev/checkstyle.xml @@ -174,6 +174,12 @@ <property name="format" value="new (java\.lang\.)?(Byte|Integer|Long|Short)\("/> <property name="message" value="Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors." /> </module> + <module name="RegexpSinglelineJava"> + <property name="format" value="Files\.createTempDir\("/> + <property name="message" + value="Avoid using com.google.common.io.Files.createTempDir() due to CVE-2020-8908. + Use org.apache.spark.network.util.JavaUtils.createTempDir() instead." /> + </module> <module name="IllegalImport"> <property name="illegalPkgs" value="org.apache.log4j" /> </module> diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 9585785835d..4e2da275694 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -430,4 +430,11 @@ This file is divided into 3 sections: <parameters><parameter name="regex">Objects.toStringHelper</parameter></parameters> <customMessage>Avoid using Object.toStringHelper. Use ToStringBuilder instead.</customMessage> </check> + + <check customId="GuavaFilesCreateTempDir" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> + <parameters><parameter name="regex">Files\.createTempDir\(</parameter></parameters> + <customMessage>Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908. + Use org.apache.spark.util.Utils.createTempDir instead. + </customMessage> + </check> </scalastyle> diff --git a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java index 41c4bf9e711..3bc704661ba 100644 --- a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java +++ b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java @@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.spark.network.util.JavaUtils; import org.apache.spark.streaming.Duration; import org.apache.spark.streaming.JavaCheckpointTestUtils; import org.apache.spark.streaming.JavaTestUtils; @@ -1463,7 +1464,7 @@ public class JavaAPISuite extends LocalJavaStreamingContext implements Serializa } @Test - public void testCheckpointMasterRecovery() throws InterruptedException { + public void testCheckpointMasterRecovery() throws InterruptedException, IOException { List<List<String>> inputData = Arrays.asList( Arrays.asList("this", "is"), Arrays.asList("a", "test"), @@ -1475,7 +1476,7 @@ public class JavaAPISuite extends LocalJavaStreamingContext implements Serializa Arrays.asList(1,4), Arrays.asList(8,7)); - File tempDir = Files.createTempDir(); + File tempDir = JavaUtils.createTempDir(); tempDir.deleteOnExit(); ssc.checkpoint(tempDir.getAbsolutePath()); @@ -1498,7 +1499,7 @@ public class JavaAPISuite extends LocalJavaStreamingContext implements Serializa } @Test - public void testContextGetOrCreate() throws InterruptedException { + public void testContextGetOrCreate() throws IOException { ssc.stop(); SparkConf conf = new SparkConf() @@ -1506,7 +1507,7 @@ public class JavaAPISuite extends LocalJavaStreamingContext implements Serializa .setAppName("test") .set("newContext", "true"); - File emptyDir = Files.createTempDir(); + File emptyDir = JavaUtils.createTempDir(); emptyDir.deleteOnExit(); StreamingContextSuite contextSuite = new StreamingContextSuite(); String corruptedCheckpointDir = contextSuite.createCorruptedCheckpoint(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org