spark git commit: [SPARK-22357][CORE][FOLLOWUP] SparkContext.binaryFiles ignore minPartitions parameter

srowen Thu, 06 Sep 2018 21:44:10 -0700

Repository: spark
Updated Branches:
  refs/heads/master b0ada7dce -> 4e3365b57



[SPARK-22357][CORE][FOLLOWUP] SparkContext.binaryFiles ignore minPartitions 
parameter

## What changes were proposed in this pull request?

This adds a test following https://github.com/apache/spark/pull/21638

## How was this patch tested?

Existing tests and new test.

Closes #22356 from srowen/SPARK-22357.2.

Authored-by: Sean Owen <sean.o...@databricks.com>
Signed-off-by: Sean Owen <sean.o...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e3365b5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e3365b5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e3365b5

Branch: refs/heads/master
Commit: 4e3365b577fbc9021fa237ea4e8792f5aea5d80c
Parents: b0ada7d
Author: Sean Owen <sean.o...@databricks.com>
Authored: Thu Sep 6 21:43:14 2018 -0700
Committer: Sean Owen <sean.o...@databricks.com>
Committed: Thu Sep 6 21:43:14 2018 -0700

----------------------------------------------------------------------
 .../test/scala/org/apache/spark/FileSuite.scala | 21 ++++++++++++++++++++
 1 file changed, 21 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4e3365b5/core/src/test/scala/org/apache/spark/FileSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala 
b/core/src/test/scala/org/apache/spark/FileSuite.scala
index a441b9c..81b18c7 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -19,10 +19,12 @@ package org.apache.spark
 
 import java.io._
 import java.nio.ByteBuffer
+import java.nio.charset.StandardCharsets
 import java.util.zip.GZIPOutputStream
 
 import scala.io.Source
 
+import com.google.common.io.Files
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io._
@@ -299,6 +301,25 @@ class FileSuite extends SparkFunSuite with 
LocalSparkContext {
     }
   }
 
+  test("SPARK-22357 test binaryFiles minPartitions") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")
+      .set("spark.files.openCostInBytes", "0")
+      .set("spark.default.parallelism", "1"))
+
+    val tempDir = Utils.createTempDir()
+    val tempDirPath = tempDir.getAbsolutePath
+
+    for (i <- 0 until 8) {
+      val tempFile = new File(tempDir, s"part-0000$i")
+      Files.write("someline1 in file1\nsomeline2 in file1\nsomeline3 in 
file1", tempFile,
+        StandardCharsets.UTF_8)
+    }
+
+    for (p <- Seq(1, 2, 8)) {
+      assert(sc.binaryFiles(tempDirPath, minPartitions = p).getNumPartitions 
=== p)
+    }
+  }
+
   test("fixed record length binary file as byte array") {
     sc = new SparkContext("local", "test")
     val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-22357][CORE][FOLLOWUP] SparkContext.binaryFiles ignore minPartitions parameter

Reply via email to