Repository: spark Updated Branches: refs/heads/master 780717367 -> d35690158
[SPARK-14284][ML] KMeansSummary deprecating size; adding clusterSizes ## What changes were proposed in this pull request? KMeansSummary class : deprecated size and added clusterSizes Author: Shally Sangal <shallysan...@gmail.com> Closes #12084 from shallys/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d3569015 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d3569015 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d3569015 Branch: refs/heads/master Commit: d35690158810465809679ef39548e1400b38d448 Parents: 7807173 Author: Shally Sangal <shallysan...@gmail.com> Authored: Tue Apr 5 10:41:59 2016 -0700 Committer: Joseph K. Bradley <jos...@databricks.com> Committed: Tue Apr 5 10:41:59 2016 -0700 ---------------------------------------------------------------------- mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/d3569015/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index 3842882..a8beef8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -299,7 +299,8 @@ class KMeansSummary private[clustering] ( * Size of each cluster. */ @Since("2.0.0") - lazy val size: Array[Int] = cluster.rdd.map { + lazy val clusterSizes: Array[Int] = cluster.rdd.map { case Row(clusterIdx: Int) => (clusterIdx, 1) }.reduceByKey(_ + _).collect().sortBy(_._1).map(_._2) + } http://git-wip-us.apache.org/repos/asf/spark/blob/d3569015/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala index d3a0df4..ed735a4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala @@ -37,7 +37,7 @@ private[r] class KMeansWrapper private ( lazy val k: Int = kMeansModel.getK - lazy val size: Array[Int] = kMeansModel.summary.size + lazy val size: Array[Int] = kMeansModel.summary.clusterSizes lazy val cluster: DataFrame = kMeansModel.summary.cluster --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org