Github user mridulm commented on a diff in the pull request: https://github.com/apache/spark/pull/20091#discussion_r162778240 --- Diff: core/src/test/scala/org/apache/spark/PartitioningSuite.scala --- @@ -284,7 +284,38 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva assert(partitioner3.numPartitions == rdd3.getNumPartitions) assert(partitioner4.numPartitions == rdd3.getNumPartitions) assert(partitioner5.numPartitions == rdd4.getNumPartitions) + } + test("defaultPartitioner when defaultParallelism is set") { + assert(!sc.conf.contains("spark.default.parallelism")) + try { + sc.conf.set("spark.default.parallelism", "4") + + val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 150) + val rdd2 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4))) + .partitionBy(new HashPartitioner(10)) + val rdd3 = sc.parallelize(Array((1, 6), (7, 8), (3, 10), (5, 12), (13, 14))) + .partitionBy(new HashPartitioner(100)) + val rdd4 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4))) + .partitionBy(new HashPartitioner(9)) + val rdd5 = sc.parallelize((1 to 10).map(x => (x, x)), 11) --- End diff -- Can we add a case where partitioner is not used and default (from spark.default.parallelism) gets used ? For example, something like the following pseudo ``` val rdd6 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4))).partitionBy(new HashPartitioner(3)) ... Partitioner.defaultPartitioner(rdd1, rdd6).numPartitions == sc.conf.get("spark.default.parallelism").toInt ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org