Github user mridulm commented on a diff in the pull request: https://github.com/apache/spark/pull/20002#discussion_r158119432 --- Diff: core/src/main/scala/org/apache/spark/Partitioner.scala --- @@ -67,6 +71,16 @@ object Partitioner { } } } + + /** + * Returns true if the number of partitions of the RDD is either greater than or is + * less than and within a single order of magnitude of the max number of upstream partitions; + * otherwise, returns false + */ + private def isEligiblePartitioner(hasMaxPartitioner: RDD[_], rdds: Seq[RDD[_]]): Boolean = { + val maxPartitions = rdds.map(_.partitions.length).max + log10(maxPartitions).floor - log10(hasMaxPartitioner.getNumPartitions).floor < 1 --- End diff -- Why `.floor` ? It causes unnecessary discontinuity imo, for example: (9, 11) will not satisfy - but it should.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org