Github user jiangxb1987 commented on a diff in the pull request: https://github.com/apache/spark/pull/19464#discussion_r144031728 --- Diff: core/src/test/scala/org/apache/spark/FileSuite.scala --- @@ -510,4 +510,16 @@ class FileSuite extends SparkFunSuite with LocalSparkContext { } } + test("spark.hadoop.filterOutEmptySplit") { + val sf = new SparkConf() + sf.setAppName("test").setMaster("local").set("spark.hadoop.filterOutEmptySplit", "true") + sc = new SparkContext(sf) + val emptyRDD = sc.parallelize(Array.empty[Tuple2[String, String]], 1) + emptyRDD.saveAsHadoopFile[TextOutputFormat[String, String]](tempDir.getPath + "/output") + assert(new File(tempDir.getPath + "/output/part-00000").exists() === true) + + val hadoopRDD = sc.textFile(tempDir.getPath + "/output/part-00000") --- End diff -- We should also add the following test cases: 1. Ensure that if no split is empty, we don't lose any splits; 2. Ensure that if part of the splits are empty, we remove the splits correctly.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org