Updated Branches: refs/heads/master 97ac06018 -> 2fd781d34
Expose numPartitions parameter in JavaPairRDD.sortByKey() This change make Java and Scala API on sortByKey() the same. Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/5013fb64 Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/5013fb64 Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/5013fb64 Branch: refs/heads/master Commit: 5013fb64b27e46bbc5daf4f06fdc70938c06cf29 Parents: 6169fe1 Author: Binh Nguyen <ngb...@gmail.com> Authored: Tue Dec 10 00:38:16 2013 -0800 Committer: Binh Nguyen <ngb...@gmail.com> Committed: Tue Dec 10 00:38:16 2013 -0800 ---------------------------------------------------------------------- .../scala/org/apache/spark/api/java/JavaPairRDD.scala | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5013fb64/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala index 2142fd7..a191dfd 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala @@ -583,12 +583,20 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in * order of the keys). */ - def sortByKey(comp: Comparator[K], ascending: Boolean): JavaPairRDD[K, V] = { + def sortByKey(comp: Comparator[K], ascending: Boolean): JavaPairRDD[K, V] = sortByKey(comp, ascending, rdd.partitions.size) + + /** + * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling + * `collect` or `save` on the resulting RDD will return or output an ordered list of records + * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in + * order of the keys). + */ + def sortByKey(comp: Comparator[K], ascending: Boolean, numPartitions: Int): JavaPairRDD[K, V] = { class KeyOrdering(val a: K) extends Ordered[K] { override def compare(b: K) = comp.compare(a, b) } implicit def toOrdered(x: K): Ordered[K] = new KeyOrdering(x) - fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending)) + fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending, numPartitions)) } /**