spark git commit: Revert "[SPARK-21306][ML] OneVsRest should support setWeightCol"
Repository: spark Updated Branches: refs/heads/branch-2.1 8520d7c6d -> 258ca40cf Revert "[SPARK-21306][ML] OneVsRest should support setWeightCol" This reverts commit 8520d7c6d5e880dea3c1a8a874148c07222b4b4b. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/258ca40c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/258ca40c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/258ca40c Branch: refs/heads/branch-2.1 Commit: 258ca40cf43eedae59b014a41fc6197df9bde299 Parents: 8520d7c Author: Yanbo LiangAuthored: Fri Jul 28 20:24:54 2017 +0800 Committer: Yanbo Liang Committed: Fri Jul 28 20:24:54 2017 +0800 -- .../spark/ml/classification/OneVsRest.scala | 39 ++-- .../ml/classification/OneVsRestSuite.scala | 10 - python/pyspark/ml/classification.py | 27 +++--- python/pyspark/ml/tests.py | 14 --- 4 files changed, 9 insertions(+), 81 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/258ca40c/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala index c4a8f1f..e58b30d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala @@ -34,7 +34,6 @@ import org.apache.spark.ml._ import org.apache.spark.ml.attribute._ import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params} -import org.apache.spark.ml.param.shared.HasWeightCol import org.apache.spark.ml.util._ import org.apache.spark.sql.{DataFrame, Dataset, Row} import org.apache.spark.sql.functions._ @@ -54,8 +53,7 @@ private[ml] trait ClassifierTypeTrait { /** * Params for [[OneVsRest]]. */ -private[ml] trait OneVsRestParams extends PredictorParams - with ClassifierTypeTrait with HasWeightCol { +private[ml] trait OneVsRestParams extends PredictorParams with ClassifierTypeTrait { /** * param for the base binary classifier that we reduce multiclass classification into. @@ -301,18 +299,6 @@ final class OneVsRest @Since("1.4.0") ( @Since("1.5.0") def setPredictionCol(value: String): this.type = set(predictionCol, value) - /** - * Sets the value of param [[weightCol]]. - * - * This is ignored if weight is not supported by [[classifier]]. - * If this is not set or empty, we treat all instance weights as 1.0. - * Default is not set, so all instances have weight one. - * - * @group setParam - */ - @Since("2.3.0") - def setWeightCol(value: String): this.type = set(weightCol, value) - @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema, fitting = true, getClassifier.featuresDataType) @@ -331,20 +317,7 @@ final class OneVsRest @Since("1.4.0") ( } val numClasses = MetadataUtils.getNumClasses(labelSchema).fold(computeNumClasses())(identity) -val weightColIsUsed = isDefined(weightCol) && $(weightCol).nonEmpty && { - getClassifier match { -case _: HasWeightCol => true -case c => - logWarning(s"weightCol is ignored, as it is not supported by $c now.") - false - } -} - -val multiclassLabeled = if (weightColIsUsed) { - dataset.select($(labelCol), $(featuresCol), $(weightCol)) -} else { - dataset.select($(labelCol), $(featuresCol)) -} +val multiclassLabeled = dataset.select($(labelCol), $(featuresCol)) // persist if underlying dataset is not persistent. val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE @@ -364,13 +337,7 @@ final class OneVsRest @Since("1.4.0") ( paramMap.put(classifier.labelCol -> labelColName) paramMap.put(classifier.featuresCol -> getFeaturesCol) paramMap.put(classifier.predictionCol -> getPredictionCol) - if (weightColIsUsed) { -val classifier_ = classifier.asInstanceOf[ClassifierType with HasWeightCol] -paramMap.put(classifier_.weightCol -> getWeightCol) -classifier_.fit(trainingDataset, paramMap) - } else { -classifier.fit(trainingDataset, paramMap) - } + classifier.fit(trainingDataset, paramMap) }.toArray[ClassificationModel[_, _]] if (handlePersistence) { http://git-wip-us.apache.org/repos/asf/spark/blob/258ca40c/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala -- diff --git
spark git commit: Revert "[SPARK-21306][ML] OneVsRest should support setWeightCol"
Repository: spark Updated Branches: refs/heads/branch-2.0 ccb827224 -> f8ae2bdd2 Revert "[SPARK-21306][ML] OneVsRest should support setWeightCol" This reverts commit ccb82722450c20c9cdea2b2c68783943213a5aa1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f8ae2bdd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f8ae2bdd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f8ae2bdd Branch: refs/heads/branch-2.0 Commit: f8ae2bdd2112780ec2b1104119bac2b718a55413 Parents: ccb8272 Author: Yanbo LiangAuthored: Fri Jul 28 19:45:14 2017 +0800 Committer: Yanbo Liang Committed: Fri Jul 28 19:45:14 2017 +0800 -- .../spark/ml/classification/OneVsRest.scala | 39 ++-- .../ml/classification/OneVsRestSuite.scala | 10 - python/pyspark/ml/classification.py | 27 +++--- python/pyspark/ml/tests.py | 14 --- 4 files changed, 9 insertions(+), 81 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f8ae2bdd/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala index 770d5db..f4ab0a0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala @@ -34,7 +34,6 @@ import org.apache.spark.ml._ import org.apache.spark.ml.attribute._ import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params} -import org.apache.spark.ml.param.shared.HasWeightCol import org.apache.spark.ml.util._ import org.apache.spark.sql.{DataFrame, Dataset, Row} import org.apache.spark.sql.functions._ @@ -54,8 +53,7 @@ private[ml] trait ClassifierTypeTrait { /** * Params for [[OneVsRest]]. */ -private[ml] trait OneVsRestParams extends PredictorParams - with ClassifierTypeTrait with HasWeightCol { +private[ml] trait OneVsRestParams extends PredictorParams with ClassifierTypeTrait { /** * param for the base binary classifier that we reduce multiclass classification into. @@ -292,18 +290,6 @@ final class OneVsRest @Since("1.4.0") ( @Since("1.5.0") def setPredictionCol(value: String): this.type = set(predictionCol, value) - /** - * Sets the value of param [[weightCol]]. - * - * This is ignored if weight is not supported by [[classifier]]. - * If this is not set or empty, we treat all instance weights as 1.0. - * Default is not set, so all instances have weight one. - * - * @group setParam - */ - @Since("2.3.0") - def setWeightCol(value: String): this.type = set(weightCol, value) - @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema, fitting = true, getClassifier.featuresDataType) @@ -322,20 +308,7 @@ final class OneVsRest @Since("1.4.0") ( } val numClasses = MetadataUtils.getNumClasses(labelSchema).fold(computeNumClasses())(identity) -val weightColIsUsed = isDefined(weightCol) && $(weightCol).nonEmpty && { - getClassifier match { -case _: HasWeightCol => true -case c => - logWarning(s"weightCol is ignored, as it is not supported by $c now.") - false - } -} - -val multiclassLabeled = if (weightColIsUsed) { - dataset.select($(labelCol), $(featuresCol), $(weightCol)) -} else { - dataset.select($(labelCol), $(featuresCol)) -} +val multiclassLabeled = dataset.select($(labelCol), $(featuresCol)) // persist if underlying dataset is not persistent. val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE @@ -355,13 +328,7 @@ final class OneVsRest @Since("1.4.0") ( paramMap.put(classifier.labelCol -> labelColName) paramMap.put(classifier.featuresCol -> getFeaturesCol) paramMap.put(classifier.predictionCol -> getPredictionCol) - if (weightColIsUsed) { -val classifier_ = classifier.asInstanceOf[ClassifierType with HasWeightCol] -paramMap.put(classifier_.weightCol -> getWeightCol) -classifier_.fit(trainingDataset, paramMap) - } else { -classifier.fit(trainingDataset, paramMap) - } + classifier.fit(trainingDataset, paramMap) }.toArray[ClassificationModel[_, _]] if (handlePersistence) { http://git-wip-us.apache.org/repos/asf/spark/blob/f8ae2bdd/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala -- diff --git