[GitHub] spark pull request #19464: [SPARK-22233] [core] Allow user to filter out emp...

HyukjinKwon Thu, 12 Oct 2017 10:13:59 -0700

Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19464#discussion_r144353093
  
    --- Diff: core/src/test/scala/org/apache/spark/FileSuite.scala ---
    @@ -510,4 +510,54 @@ class FileSuite extends SparkFunSuite with 
LocalSparkContext {
         }
       }
     
    +  test("spark.files.ignoreEmptySplits work correctly (old Hadoop API)") {
    +    val conf = new SparkConf()
    +    conf.setAppName("test").setMaster("local").set(IGNORE_EMPTY_SPLITS, 
true)
    +    sc = new SparkContext(conf)
    +
    +    def testIgnoreEmptySplits(data: Array[Tuple2[String, String]], 
numSlices: Int,
    +                              outputSuffix: Int, checkPart: String, 
partitionLength: Int): Unit = {
    +      val dataRDD = sc.parallelize(data, numSlices)
    +      val output = new File(tempDir, "output" + outputSuffix)
    +      dataRDD.saveAsHadoopFile[TextOutputFormat[String, 
String]](output.getPath)
    +      assert(new File(output, checkPart).exists() === true)
    +      val hadoopRDD = sc.textFile(new File(output, "part-*").getPath)
    +      assert(hadoopRDD.partitions.length === partitionLength)
    +    }
    +
    +    // Ensure that if all of the splits are empty, we remove the splits 
correctly
    +    testIgnoreEmptySplits(Array.empty[Tuple2[String, String]], 1, 0, 
"part-00000", 0)
    --- End diff --
    
    I'd call it with named arguments, for example,
    
    ```scala
    testIgnoreEmptySplits(
      Array.empty[Tuple2[String, String]],
      numSlices = 1,
      outputSuffix = 0,
      checkPart = "part-00000",
      expectedPartitionNum = 0)
    ```



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #19464: [SPARK-22233] [core] Allow user to filter out emp...

Reply via email to