Github user yanboliang commented on a diff in the pull request: https://github.com/apache/spark/pull/19204#discussion_r139312199 --- Diff: python/pyspark/ml/evaluation.py --- @@ -328,6 +329,86 @@ def setParams(self, predictionCol="prediction", labelCol="label", kwargs = self._input_kwargs return self._set(**kwargs) + +@inherit_doc +class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol, + JavaMLReadable, JavaMLWritable): + """ + .. note:: Experimental + + Evaluator for Clustering results, which expects two input + columns: prediction and features. + + >>> from sklearn import datasets + >>> from pyspark.sql.types import * + >>> from pyspark.ml.linalg import Vectors, VectorUDT + >>> from pyspark.ml.evaluation import ClusteringEvaluator + ... + >>> iris = datasets.load_iris() + >>> iris_rows = [(Vectors.dense(x), int(iris.target[i])) + ... for i, x in enumerate(iris.data)] + >>> schema = StructType([ + ... StructField("features", VectorUDT(), True), + ... StructField("cluster_id", IntegerType(), True)]) + >>> rdd = spark.sparkContext.parallelize(iris_rows) + >>> dataset = spark.createDataFrame(rdd, schema) + ... + >>> evaluator = ClusteringEvaluator(predictionCol="cluster_id") + >>> evaluator.evaluate(dataset) + 0.656... + >>> ce_path = temp_path + "/ce" + >>> evaluator.save(ce_path) + >>> evaluator2 = ClusteringEvaluator.load(ce_path) + >>> str(evaluator2.getPredictionCol()) + 'cluster_id' + + .. versionadded:: 2.3.0 + """ + metricName = Param(Params._dummy(), "metricName", + "metric name in evaluation (silhouette)", + typeConverter=TypeConverters.toString) + + @keyword_only + def __init__(self, predictionCol="prediction", featuresCol="features", + metricName="silhouette"): + """ + __init__(self, predictionCol="prediction", featuresCol="features", \ + metricName="silhouette") + """ + super(ClusteringEvaluator, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.evaluation.ClusteringEvaluator", self.uid) + self._setDefault(predictionCol="prediction", featuresCol="features", --- End diff -- Remove setting default value for ```predictionCol``` and ```featuresCol```, as they have been set in ```HasPredictionCol``` and ```HasFeaturesCol```.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org