spark git commit: [SPARK-11852][ML] StandardScaler minor refactor

2015-11-20 Thread meng
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 eab90d3f3 -> b11aa1797


[SPARK-11852][ML] StandardScaler minor refactor

```withStd``` and ```withMean``` should be params of ```StandardScaler``` and 
```StandardScalerModel```.

Author: Yanbo Liang 

Closes #9839 from yanboliang/standardScaler-refactor.

(cherry picked from commit 9ace2e5c8d7fbd360a93bc5fc4eace64a697b44f)
Signed-off-by: Xiangrui Meng 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b11aa179
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b11aa179
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b11aa179

Branch: refs/heads/branch-1.6
Commit: b11aa1797c928f2cfaf1d8821eff4be4109ac41d
Parents: eab90d3
Author: Yanbo Liang 
Authored: Fri Nov 20 09:55:53 2015 -0800
Committer: Xiangrui Meng 
Committed: Fri Nov 20 09:56:02 2015 -0800

--
 .../spark/ml/feature/StandardScaler.scala   | 60 +---
 .../spark/ml/feature/StandardScalerSuite.scala  | 11 ++--
 2 files changed, 32 insertions(+), 39 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b11aa179/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index 6d54521..d76a9c6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -36,20 +36,30 @@ import org.apache.spark.sql.types.{StructField, StructType}
 private[feature] trait StandardScalerParams extends Params with HasInputCol 
with HasOutputCol {
 
   /**
-   * Centers the data with mean before scaling.
+   * Whether to center the data with mean before scaling.
* It will build a dense output, so this does not work on sparse input
* and will raise an exception.
* Default: false
* @group param
*/
-  val withMean: BooleanParam = new BooleanParam(this, "withMean", "Center data 
with mean")
+  val withMean: BooleanParam = new BooleanParam(this, "withMean",
+"Whether to center data with mean")
+
+  /** @group getParam */
+  def getWithMean: Boolean = $(withMean)
 
   /**
-   * Scales the data to unit standard deviation.
+   * Whether to scale the data to unit standard deviation.
* Default: true
* @group param
*/
-  val withStd: BooleanParam = new BooleanParam(this, "withStd", "Scale to unit 
standard deviation")
+  val withStd: BooleanParam = new BooleanParam(this, "withStd",
+"Whether to scale the data to unit standard deviation")
+
+  /** @group getParam */
+  def getWithStd: Boolean = $(withStd)
+
+  setDefault(withMean -> false, withStd -> true)
 }
 
 /**
@@ -63,8 +73,6 @@ class StandardScaler(override val uid: String) extends 
Estimator[StandardScalerM
 
   def this() = this(Identifiable.randomUID("stdScal"))
 
-  setDefault(withMean -> false, withStd -> true)
-
   /** @group setParam */
   def setInputCol(value: String): this.type = set(inputCol, value)
 
@@ -82,7 +90,7 @@ class StandardScaler(override val uid: String) extends 
Estimator[StandardScalerM
 val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v }
 val scaler = new feature.StandardScaler(withMean = $(withMean), withStd = 
$(withStd))
 val scalerModel = scaler.fit(input)
-copyValues(new StandardScalerModel(uid, scalerModel).setParent(this))
+copyValues(new StandardScalerModel(uid, scalerModel.std, 
scalerModel.mean).setParent(this))
   }
 
   override def transformSchema(schema: StructType): StructType = {
@@ -108,29 +116,19 @@ object StandardScaler extends 
DefaultParamsReadable[StandardScaler] {
 /**
  * :: Experimental ::
  * Model fitted by [[StandardScaler]].
+ *
+ * @param std Standard deviation of the StandardScalerModel
+ * @param mean Mean of the StandardScalerModel
  */
 @Experimental
 class StandardScalerModel private[ml] (
 override val uid: String,
-scaler: feature.StandardScalerModel)
+val std: Vector,
+val mean: Vector)
   extends Model[StandardScalerModel] with StandardScalerParams with MLWritable 
{
 
   import StandardScalerModel._
 
-  /** Standard deviation of the StandardScalerModel */
-  val std: Vector = scaler.std
-
-  /** Mean of the StandardScalerModel */
-  val mean: Vector = scaler.mean
-
-  /** Whether to scale to unit standard deviation. */
-  @Since("1.6.0")
-  def getWithStd: Boolean = scaler.withStd
-
-  /** Whether to center data with mean. */
-  @Since("1.6.0")
-  def getWithMean: Boolean = scaler.withMean
-
   /** @group setParam */
   def setInputCol(value: 

spark git commit: [SPARK-11852][ML] StandardScaler minor refactor

2015-11-20 Thread meng
Repository: spark
Updated Branches:
  refs/heads/master a66142dec -> 9ace2e5c8


[SPARK-11852][ML] StandardScaler minor refactor

```withStd``` and ```withMean``` should be params of ```StandardScaler``` and 
```StandardScalerModel```.

Author: Yanbo Liang 

Closes #9839 from yanboliang/standardScaler-refactor.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ace2e5c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ace2e5c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ace2e5c

Branch: refs/heads/master
Commit: 9ace2e5c8d7fbd360a93bc5fc4eace64a697b44f
Parents: a66142d
Author: Yanbo Liang 
Authored: Fri Nov 20 09:55:53 2015 -0800
Committer: Xiangrui Meng 
Committed: Fri Nov 20 09:55:53 2015 -0800

--
 .../spark/ml/feature/StandardScaler.scala   | 60 +---
 .../spark/ml/feature/StandardScalerSuite.scala  | 11 ++--
 2 files changed, 32 insertions(+), 39 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9ace2e5c/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index 6d54521..d76a9c6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -36,20 +36,30 @@ import org.apache.spark.sql.types.{StructField, StructType}
 private[feature] trait StandardScalerParams extends Params with HasInputCol 
with HasOutputCol {
 
   /**
-   * Centers the data with mean before scaling.
+   * Whether to center the data with mean before scaling.
* It will build a dense output, so this does not work on sparse input
* and will raise an exception.
* Default: false
* @group param
*/
-  val withMean: BooleanParam = new BooleanParam(this, "withMean", "Center data 
with mean")
+  val withMean: BooleanParam = new BooleanParam(this, "withMean",
+"Whether to center data with mean")
+
+  /** @group getParam */
+  def getWithMean: Boolean = $(withMean)
 
   /**
-   * Scales the data to unit standard deviation.
+   * Whether to scale the data to unit standard deviation.
* Default: true
* @group param
*/
-  val withStd: BooleanParam = new BooleanParam(this, "withStd", "Scale to unit 
standard deviation")
+  val withStd: BooleanParam = new BooleanParam(this, "withStd",
+"Whether to scale the data to unit standard deviation")
+
+  /** @group getParam */
+  def getWithStd: Boolean = $(withStd)
+
+  setDefault(withMean -> false, withStd -> true)
 }
 
 /**
@@ -63,8 +73,6 @@ class StandardScaler(override val uid: String) extends 
Estimator[StandardScalerM
 
   def this() = this(Identifiable.randomUID("stdScal"))
 
-  setDefault(withMean -> false, withStd -> true)
-
   /** @group setParam */
   def setInputCol(value: String): this.type = set(inputCol, value)
 
@@ -82,7 +90,7 @@ class StandardScaler(override val uid: String) extends 
Estimator[StandardScalerM
 val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v }
 val scaler = new feature.StandardScaler(withMean = $(withMean), withStd = 
$(withStd))
 val scalerModel = scaler.fit(input)
-copyValues(new StandardScalerModel(uid, scalerModel).setParent(this))
+copyValues(new StandardScalerModel(uid, scalerModel.std, 
scalerModel.mean).setParent(this))
   }
 
   override def transformSchema(schema: StructType): StructType = {
@@ -108,29 +116,19 @@ object StandardScaler extends 
DefaultParamsReadable[StandardScaler] {
 /**
  * :: Experimental ::
  * Model fitted by [[StandardScaler]].
+ *
+ * @param std Standard deviation of the StandardScalerModel
+ * @param mean Mean of the StandardScalerModel
  */
 @Experimental
 class StandardScalerModel private[ml] (
 override val uid: String,
-scaler: feature.StandardScalerModel)
+val std: Vector,
+val mean: Vector)
   extends Model[StandardScalerModel] with StandardScalerParams with MLWritable 
{
 
   import StandardScalerModel._
 
-  /** Standard deviation of the StandardScalerModel */
-  val std: Vector = scaler.std
-
-  /** Mean of the StandardScalerModel */
-  val mean: Vector = scaler.mean
-
-  /** Whether to scale to unit standard deviation. */
-  @Since("1.6.0")
-  def getWithStd: Boolean = scaler.withStd
-
-  /** Whether to center data with mean. */
-  @Since("1.6.0")
-  def getWithMean: Boolean = scaler.withMean
-
   /** @group setParam */
   def setInputCol(value: String): this.type = set(inputCol, value)
 
@@ -139,6 +137,7 @@ class StandardScalerModel private[ml] (
 
   override def