Repository: spark Updated Branches: refs/heads/master 1c5b19827 -> 82a396c2f
[SPARK-7910] [TINY] [JAVAAPI] expose partitioner information in javardd Author: Holden Karau <hol...@pigscanfly.ca> Closes #6464 from holdenk/SPARK-7910-expose-partitioner-information-in-javardd and squashes the following commits: de1e644 [Holden Karau] Fix the test to get the partitioner bdb31cc [Holden Karau] Add Mima exclude for the new method 347ef4c [Holden Karau] Add a quick little test for the partitioner JavaAPI f49dca9 [Holden Karau] Add partitoner information to JavaRDDLike and fix some whitespace Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/82a396c2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/82a396c2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/82a396c2 Branch: refs/heads/master Commit: 82a396c2f594bade276606dcd0c0545a650fb838 Parents: 1c5b198 Author: Holden Karau <hol...@pigscanfly.ca> Authored: Fri May 29 14:59:18 2015 -0700 Committer: Josh Rosen <joshro...@databricks.com> Committed: Fri May 29 14:59:18 2015 -0700 ---------------------------------------------------------------------- .../main/scala/org/apache/spark/api/java/JavaRDDLike.scala | 9 ++++++--- core/src/test/java/org/apache/spark/JavaAPISuite.java | 2 ++ project/MimaExcludes.scala | 2 ++ 3 files changed, 10 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/82a396c2/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index b8e15f3..c95615a 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -60,10 +60,13 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { @deprecated("Use partitions() instead.", "1.1.0") def splits: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq) - + /** Set of partitions in this RDD. */ def partitions: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq) + /** The partitioner of this RDD. */ + def partitioner: Optional[Partitioner] = JavaUtils.optionToOptional(rdd.partitioner) + /** The [[org.apache.spark.SparkContext]] that this RDD was created on. */ def context: SparkContext = rdd.context @@ -492,9 +495,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { new java.util.ArrayList(arr) } - def takeSample(withReplacement: Boolean, num: Int): JList[T] = + def takeSample(withReplacement: Boolean, num: Int): JList[T] = takeSample(withReplacement, num, Utils.random.nextLong) - + def takeSample(withReplacement: Boolean, num: Int, seed: Long): JList[T] = { import scala.collection.JavaConversions._ val arr: java.util.Collection[T] = rdd.takeSample(withReplacement, num, seed).toSeq http://git-wip-us.apache.org/repos/asf/spark/blob/82a396c2/core/src/test/java/org/apache/spark/JavaAPISuite.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java index c2089b0..dfd86d3 100644 --- a/core/src/test/java/org/apache/spark/JavaAPISuite.java +++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java @@ -212,6 +212,8 @@ public class JavaAPISuite implements Serializable { JavaPairRDD<Integer, Integer> repartitioned = rdd.repartitionAndSortWithinPartitions(partitioner); + Assert.assertTrue(repartitioned.partitioner().isPresent()); + Assert.assertEquals(repartitioned.partitioner().get(), partitioner); List<List<Tuple2<Integer, Integer>>> partitions = repartitioned.glom().collect(); Assert.assertEquals(partitions.get(0), Arrays.asList(new Tuple2<Integer, Integer>(0, 5), new Tuple2<Integer, Integer>(0, 8), new Tuple2<Integer, Integer>(2, 6))); http://git-wip-us.apache.org/repos/asf/spark/blob/82a396c2/project/MimaExcludes.scala ---------------------------------------------------------------------- diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 11b439e..8da72b3 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -38,6 +38,8 @@ object MimaExcludes { Seq( MimaBuild.excludeSparkPackage("deploy"), MimaBuild.excludeSparkPackage("ml"), + // SPARK-7910 Adding a method to get the partioner to JavaRDD, + ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitioner"), // SPARK-5922 Adding a generalized diff(other: RDD[(VertexId, VD)]) to VertexRDD ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.VertexRDD.diff"), // These are needed if checking against the sbt build, since they are part of --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org