Repository: spark Updated Branches: refs/heads/master ec96d34e7 -> 4acbda4a9
Revert "[SPARK-25764][ML][EXAMPLES] Update BisectingKMeans example to use ClusteringEvaluator" This reverts commit d0ecff28545ac81f5ba7ac06957ced65b6e3ebcd. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4acbda4a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4acbda4a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4acbda4a Branch: refs/heads/master Commit: 4acbda4a96a5d6ef9065544631a3457e8d7b1748 Parents: ec96d34 Author: Wenchen Fan <wenc...@databricks.com> Authored: Sat Oct 20 09:28:53 2018 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Sat Oct 20 09:28:53 2018 +0800 ---------------------------------------------------------------------- .../spark/examples/ml/JavaBisectingKMeansExample.java | 12 +++--------- .../src/main/python/ml/bisecting_k_means_example.py | 12 +++--------- .../spark/examples/ml/BisectingKMeansExample.scala | 12 +++--------- 3 files changed, 9 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4acbda4a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java index f517dc3..8c82aaa 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java @@ -20,7 +20,6 @@ package org.apache.spark.examples.ml; // $example on$ import org.apache.spark.ml.clustering.BisectingKMeans; import org.apache.spark.ml.clustering.BisectingKMeansModel; -import org.apache.spark.ml.evaluation.ClusteringEvaluator; import org.apache.spark.ml.linalg.Vector; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -51,14 +50,9 @@ public class JavaBisectingKMeansExample { BisectingKMeans bkm = new BisectingKMeans().setK(2).setSeed(1); BisectingKMeansModel model = bkm.fit(dataset); - // Make predictions - Dataset<Row> predictions = model.transform(dataset); - - // Evaluate clustering by computing Silhouette score - ClusteringEvaluator evaluator = new ClusteringEvaluator(); - - double silhouette = evaluator.evaluate(predictions); - System.out.println("Silhouette with squared euclidean distance = " + silhouette); + // Evaluate clustering. + double cost = model.computeCost(dataset); + System.out.println("Within Set Sum of Squared Errors = " + cost); // Shows the result. System.out.println("Cluster Centers: "); http://git-wip-us.apache.org/repos/asf/spark/blob/4acbda4a/examples/src/main/python/ml/bisecting_k_means_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py index 82adb33..7842d20 100644 --- a/examples/src/main/python/ml/bisecting_k_means_example.py +++ b/examples/src/main/python/ml/bisecting_k_means_example.py @@ -24,7 +24,6 @@ from __future__ import print_function # $example on$ from pyspark.ml.clustering import BisectingKMeans -from pyspark.ml.evaluation import ClusteringEvaluator # $example off$ from pyspark.sql import SparkSession @@ -42,14 +41,9 @@ if __name__ == "__main__": bkm = BisectingKMeans().setK(2).setSeed(1) model = bkm.fit(dataset) - # Make predictions - predictions = model.transform(dataset) - - # Evaluate clustering by computing Silhouette score - evaluator = ClusteringEvaluator() - - silhouette = evaluator.evaluate(predictions) - print("Silhouette with squared euclidean distance = " + str(silhouette)) + # Evaluate clustering. + cost = model.computeCost(dataset) + print("Within Set Sum of Squared Errors = " + str(cost)) # Shows the result. print("Cluster Centers: ") http://git-wip-us.apache.org/repos/asf/spark/blob/4acbda4a/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala ---------------------------------------------------------------------- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala index 14e13df..5f8f2c9 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala @@ -21,7 +21,6 @@ package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.clustering.BisectingKMeans -import org.apache.spark.ml.evaluation.ClusteringEvaluator // $example off$ import org.apache.spark.sql.SparkSession @@ -49,14 +48,9 @@ object BisectingKMeansExample { val bkm = new BisectingKMeans().setK(2).setSeed(1) val model = bkm.fit(dataset) - // Make predictions - val predictions = model.transform(dataset) - - // Evaluate clustering by computing Silhouette score - val evaluator = new ClusteringEvaluator() - - val silhouette = evaluator.evaluate(predictions) - println(s"Silhouette with squared euclidean distance = $silhouette") + // Evaluate clustering. + val cost = model.computeCost(dataset) + println(s"Within Set Sum of Squared Errors = $cost") // Shows the result. println("Cluster Centers: ") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org