Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/19464#discussion_r144353093 --- Diff: core/src/test/scala/org/apache/spark/FileSuite.scala --- @@ -510,4 +510,54 @@ class FileSuite extends SparkFunSuite with LocalSparkContext { } } + test("spark.files.ignoreEmptySplits work correctly (old Hadoop API)") { + val conf = new SparkConf() + conf.setAppName("test").setMaster("local").set(IGNORE_EMPTY_SPLITS, true) + sc = new SparkContext(conf) + + def testIgnoreEmptySplits(data: Array[Tuple2[String, String]], numSlices: Int, + outputSuffix: Int, checkPart: String, partitionLength: Int): Unit = { + val dataRDD = sc.parallelize(data, numSlices) + val output = new File(tempDir, "output" + outputSuffix) + dataRDD.saveAsHadoopFile[TextOutputFormat[String, String]](output.getPath) + assert(new File(output, checkPart).exists() === true) + val hadoopRDD = sc.textFile(new File(output, "part-*").getPath) + assert(hadoopRDD.partitions.length === partitionLength) + } + + // Ensure that if all of the splits are empty, we remove the splits correctly + testIgnoreEmptySplits(Array.empty[Tuple2[String, String]], 1, 0, "part-00000", 0) --- End diff -- I'd call it with named arguments, for example, ```scala testIgnoreEmptySplits( Array.empty[Tuple2[String, String]], numSlices = 1, outputSuffix = 0, checkPart = "part-00000", expectedPartitionNum = 0) ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org