Github user srowen commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22356#discussion_r215737907
  
    --- Diff: core/src/test/scala/org/apache/spark/FileSuite.scala ---
    @@ -299,6 +301,25 @@ class FileSuite extends SparkFunSuite with 
LocalSparkContext {
         }
       }
     
    +  test("SPARK-22357 test binaryFiles minPartitions") {
    +    sc = new SparkContext(new 
SparkConf().setAppName("test").setMaster("local")
    +      .set("spark.files.openCostInBytes", "0")
    --- End diff --
    
    This removes its effect in the section of code we're really trying to test:
    
    ```
    def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions: 
Int) {
        val defaultMaxSplitBytes = 
sc.getConf.get(config.FILES_MAX_PARTITION_BYTES)
        val openCostInBytes = sc.getConf.get(config.FILES_OPEN_COST_IN_BYTES)
        val defaultParallelism = Math.max(sc.defaultParallelism, minPartitions)
        val files = listStatus(context).asScala
        val totalBytes = files.filterNot(_.isDirectory).map(_.getLen + 
openCostInBytes).sum
        val bytesPerCore = totalBytes / defaultParallelism
        val maxSplitSize = Math.min(defaultMaxSplitBytes, 
Math.max(openCostInBytes, bytesPerCore))
        super.setMaxSplitSize(maxSplitSize)
      }
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to