Github user smurching commented on a diff in the pull request: https://github.com/apache/spark/pull/19186#discussion_r138139729 --- Diff: mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala --- @@ -300,20 +300,23 @@ class KMeans @Since("1.5.0") ( @Since("1.5.0") def setSeed(value: Long): this.type = set(seed, value) + /** @group setParam */ + @Since("2.3.0") + def setHandlePersistence(value: Boolean): this.type = set(handlePersistence, value) + @Since("2.0.0") override def fit(dataset: Dataset[_]): KMeansModel = { transformSchema(dataset.schema, logging = true) - val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE val instances: RDD[OldVector] = dataset.select(col($(featuresCol))).rdd.map { case Row(point: Vector) => OldVectors.fromML(point) } - if (handlePersistence) { + if ($(handlePersistence)) { --- End diff -- See comment above, we should also check that `dataset.storageLevel == StorageLevel.NONE`
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org