spark git commit: Revert "[SPARK-21306][ML] OneVsRest should support setWeightCol"

2017-07-28 Thread yliang
Repository: spark
Updated Branches:
  refs/heads/branch-2.1 8520d7c6d -> 258ca40cf


Revert "[SPARK-21306][ML] OneVsRest should support setWeightCol"

This reverts commit 8520d7c6d5e880dea3c1a8a874148c07222b4b4b.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/258ca40c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/258ca40c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/258ca40c

Branch: refs/heads/branch-2.1
Commit: 258ca40cf43eedae59b014a41fc6197df9bde299
Parents: 8520d7c
Author: Yanbo Liang 
Authored: Fri Jul 28 20:24:54 2017 +0800
Committer: Yanbo Liang 
Committed: Fri Jul 28 20:24:54 2017 +0800

--
 .../spark/ml/classification/OneVsRest.scala | 39 ++--
 .../ml/classification/OneVsRestSuite.scala  | 10 -
 python/pyspark/ml/classification.py | 27 +++---
 python/pyspark/ml/tests.py  | 14 ---
 4 files changed, 9 insertions(+), 81 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/258ca40c/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index c4a8f1f..e58b30d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -34,7 +34,6 @@ import org.apache.spark.ml._
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params}
-import org.apache.spark.ml.param.shared.HasWeightCol
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions._
@@ -54,8 +53,7 @@ private[ml] trait ClassifierTypeTrait {
 /**
  * Params for [[OneVsRest]].
  */
-private[ml] trait OneVsRestParams extends PredictorParams
-  with ClassifierTypeTrait with HasWeightCol {
+private[ml] trait OneVsRestParams extends PredictorParams with 
ClassifierTypeTrait {
 
   /**
* param for the base binary classifier that we reduce multiclass 
classification into.
@@ -301,18 +299,6 @@ final class OneVsRest @Since("1.4.0") (
   @Since("1.5.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
 
-  /**
-   * Sets the value of param [[weightCol]].
-   *
-   * This is ignored if weight is not supported by [[classifier]].
-   * If this is not set or empty, we treat all instance weights as 1.0.
-   * Default is not set, so all instances have weight one.
-   *
-   * @group setParam
-   */
-  @Since("2.3.0")
-  def setWeightCol(value: String): this.type = set(weightCol, value)
-
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
 validateAndTransformSchema(schema, fitting = true, 
getClassifier.featuresDataType)
@@ -331,20 +317,7 @@ final class OneVsRest @Since("1.4.0") (
 }
 val numClasses = 
MetadataUtils.getNumClasses(labelSchema).fold(computeNumClasses())(identity)
 
-val weightColIsUsed = isDefined(weightCol) && $(weightCol).nonEmpty && {
-  getClassifier match {
-case _: HasWeightCol => true
-case c =>
-  logWarning(s"weightCol is ignored, as it is not supported by $c 
now.")
-  false
-  }
-}
-
-val multiclassLabeled = if (weightColIsUsed) {
-  dataset.select($(labelCol), $(featuresCol), $(weightCol))
-} else {
-  dataset.select($(labelCol), $(featuresCol))
-}
+val multiclassLabeled = dataset.select($(labelCol), $(featuresCol))
 
 // persist if underlying dataset is not persistent.
 val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
@@ -364,13 +337,7 @@ final class OneVsRest @Since("1.4.0") (
   paramMap.put(classifier.labelCol -> labelColName)
   paramMap.put(classifier.featuresCol -> getFeaturesCol)
   paramMap.put(classifier.predictionCol -> getPredictionCol)
-  if (weightColIsUsed) {
-val classifier_ = classifier.asInstanceOf[ClassifierType with 
HasWeightCol]
-paramMap.put(classifier_.weightCol -> getWeightCol)
-classifier_.fit(trainingDataset, paramMap)
-  } else {
-classifier.fit(trainingDataset, paramMap)
-  }
+  classifier.fit(trainingDataset, paramMap)
 }.toArray[ClassificationModel[_, _]]
 
 if (handlePersistence) {

http://git-wip-us.apache.org/repos/asf/spark/blob/258ca40c/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
--
diff --git 

spark git commit: Revert "[SPARK-21306][ML] OneVsRest should support setWeightCol"

2017-07-28 Thread yliang
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 ccb827224 -> f8ae2bdd2


Revert "[SPARK-21306][ML] OneVsRest should support setWeightCol"

This reverts commit ccb82722450c20c9cdea2b2c68783943213a5aa1.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f8ae2bdd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f8ae2bdd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f8ae2bdd

Branch: refs/heads/branch-2.0
Commit: f8ae2bdd2112780ec2b1104119bac2b718a55413
Parents: ccb8272
Author: Yanbo Liang 
Authored: Fri Jul 28 19:45:14 2017 +0800
Committer: Yanbo Liang 
Committed: Fri Jul 28 19:45:14 2017 +0800

--
 .../spark/ml/classification/OneVsRest.scala | 39 ++--
 .../ml/classification/OneVsRestSuite.scala  | 10 -
 python/pyspark/ml/classification.py | 27 +++---
 python/pyspark/ml/tests.py  | 14 ---
 4 files changed, 9 insertions(+), 81 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f8ae2bdd/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 770d5db..f4ab0a0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -34,7 +34,6 @@ import org.apache.spark.ml._
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params}
-import org.apache.spark.ml.param.shared.HasWeightCol
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions._
@@ -54,8 +53,7 @@ private[ml] trait ClassifierTypeTrait {
 /**
  * Params for [[OneVsRest]].
  */
-private[ml] trait OneVsRestParams extends PredictorParams
-  with ClassifierTypeTrait with HasWeightCol {
+private[ml] trait OneVsRestParams extends PredictorParams with 
ClassifierTypeTrait {
 
   /**
* param for the base binary classifier that we reduce multiclass 
classification into.
@@ -292,18 +290,6 @@ final class OneVsRest @Since("1.4.0") (
   @Since("1.5.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
 
-  /**
-   * Sets the value of param [[weightCol]].
-   *
-   * This is ignored if weight is not supported by [[classifier]].
-   * If this is not set or empty, we treat all instance weights as 1.0.
-   * Default is not set, so all instances have weight one.
-   *
-   * @group setParam
-   */
-  @Since("2.3.0")
-  def setWeightCol(value: String): this.type = set(weightCol, value)
-
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
 validateAndTransformSchema(schema, fitting = true, 
getClassifier.featuresDataType)
@@ -322,20 +308,7 @@ final class OneVsRest @Since("1.4.0") (
 }
 val numClasses = 
MetadataUtils.getNumClasses(labelSchema).fold(computeNumClasses())(identity)
 
-val weightColIsUsed = isDefined(weightCol) && $(weightCol).nonEmpty && {
-  getClassifier match {
-case _: HasWeightCol => true
-case c =>
-  logWarning(s"weightCol is ignored, as it is not supported by $c 
now.")
-  false
-  }
-}
-
-val multiclassLabeled = if (weightColIsUsed) {
-  dataset.select($(labelCol), $(featuresCol), $(weightCol))
-} else {
-  dataset.select($(labelCol), $(featuresCol))
-}
+val multiclassLabeled = dataset.select($(labelCol), $(featuresCol))
 
 // persist if underlying dataset is not persistent.
 val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
@@ -355,13 +328,7 @@ final class OneVsRest @Since("1.4.0") (
   paramMap.put(classifier.labelCol -> labelColName)
   paramMap.put(classifier.featuresCol -> getFeaturesCol)
   paramMap.put(classifier.predictionCol -> getPredictionCol)
-  if (weightColIsUsed) {
-val classifier_ = classifier.asInstanceOf[ClassifierType with 
HasWeightCol]
-paramMap.put(classifier_.weightCol -> getWeightCol)
-classifier_.fit(trainingDataset, paramMap)
-  } else {
-classifier.fit(trainingDataset, paramMap)
-  }
+  classifier.fit(trainingDataset, paramMap)
 }.toArray[ClassificationModel[_, _]]
 
 if (handlePersistence) {

http://git-wip-us.apache.org/repos/asf/spark/blob/f8ae2bdd/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
--
diff --git