spark git commit: [SPARK-16485][ML][DOC] Fix privacy of GLM members, rename sqlDataTypes for ML, doc fixes

jkbradley Wed, 13 Jul 2016 15:41:47 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 550d0e7dc -> abb802359



[SPARK-16485][ML][DOC] Fix privacy of GLM members, rename sqlDataTypes for ML, 
doc fixes

## What changes were proposed in this pull request?

Fixing issues found during 2.0 API checks:
* GeneralizedLinearRegressionModel: linkObj, familyObj, familyAndLink should 
not be exposed
* sqlDataTypes: name does not follow conventions. Do we need to expose it?
* Evaluator: inconsistent doc between evaluate and isLargerBetter
* MinMaxScaler: math rendering --> hard to make it great, but I'll change it a 
little
* GeneralizedLinearRegressionSummary: aic doc is incorrect --> will change to 
use more common name

## How was this patch tested?

Existing unit tests.  Docs generated locally.  (MinMaxScaler is improved a tiny 
bit.)

Author: Joseph K. Bradley <jos...@databricks.com>

Closes #14187 from jkbradley/final-api-check-2.0.

(cherry picked from commit a5f51e21627c1bcfc62829a3a962707abf41a452)
Signed-off-by: Joseph K. Bradley <jos...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/abb80235
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/abb80235
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/abb80235

Branch: refs/heads/branch-2.0
Commit: abb8023599df4a9b5133accf547607eda5ca45d2
Parents: 550d0e7
Author: Joseph K. Bradley <jos...@databricks.com>
Authored: Wed Jul 13 15:40:44 2016 -0700
Committer: Joseph K. Bradley <jos...@databricks.com>
Committed: Wed Jul 13 15:40:53 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/ml/evaluation/Evaluator.scala  |  7 ++--
 .../apache/spark/ml/feature/MinMaxScaler.scala  |  4 +--
 .../apache/spark/ml/linalg/SQLDataTypes.scala   | 36 ++++++++++++++++++++
 .../org/apache/spark/ml/linalg/dataTypes.scala  | 35 -------------------
 .../GeneralizedLinearRegression.scala           | 10 +++---
 .../spark/ml/linalg/JavaSQLDataTypesSuite.java  |  2 +-
 .../spark/ml/linalg/SQLDataTypesSuite.scala     |  4 +--
 7 files changed, 51 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala 
b/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
index 5f765c0..dfbc3e5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
@@ -30,7 +30,8 @@ import org.apache.spark.sql.Dataset
 abstract class Evaluator extends Params {
 
   /**
-   * Evaluates model output and returns a scalar metric (larger is better).
+   * Evaluates model output and returns a scalar metric.
+   * The value of [[isLargerBetter]] specifies whether larger values are 
better.
    *
    * @param dataset a dataset that contains labels/observations and 
predictions.
    * @param paramMap parameter map that specifies the input columns and output 
metrics
@@ -42,7 +43,9 @@ abstract class Evaluator extends Params {
   }
 
   /**
-   * Evaluates the output.
+   * Evaluates model output and returns a scalar metric.
+   * The value of [[isLargerBetter]] specifies whether larger values are 
better.
+   *
    * @param dataset a dataset that contains labels/observations and 
predictions.
    * @return metric
    */

http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index 7b03f0c..9ed8d83 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -78,9 +78,9 @@ private[feature] trait MinMaxScalerParams extends Params with 
HasInputCol with H
  * statistics, which is also known as min-max normalization or Rescaling. The 
rescaled value for
  * feature E is calculated as,
  *
- * Rescaled(e_i) = \frac{e_i - E_{min}}{E_{max} - E_{min}} * (max - min) + min
+ * `Rescaled(e_i) = \frac{e_i - E_{min}}{E_{max} - E_{min}} * (max - min) + 
min`
  *
- * For the case E_{max} == E_{min}, Rescaled(e_i) = 0.5 * (max + min)
+ * For the case `E_{max} == E_{min}`, `Rescaled(e_i) = 0.5 * (max + min)`.
  * Note that since zero values will probably be transformed to non-zero 
values, output of the
  * transformer will be DenseVector even for sparse input.
  */

http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/linalg/SQLDataTypes.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/SQLDataTypes.scala 
b/mllib/src/main/scala/org/apache/spark/ml/linalg/SQLDataTypes.scala
new file mode 100644
index 0000000..a66ba27
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/SQLDataTypes.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import org.apache.spark.annotation.{DeveloperApi, Since}
+import org.apache.spark.sql.types.DataType
+
+/**
+ * :: DeveloperApi ::
+ * SQL data types for vectors and matrices.
+ */
+@Since("2.0.0")
+@DeveloperApi
+object SQLDataTypes {
+
+  /** Data type for [[Vector]]. */
+  val VectorType: DataType = new VectorUDT
+
+  /** Data type for [[Matrix]]. */
+  val MatrixType: DataType = new MatrixUDT
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/linalg/dataTypes.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/dataTypes.scala 
b/mllib/src/main/scala/org/apache/spark/ml/linalg/dataTypes.scala
deleted file mode 100644
index 52a6fd2..0000000
--- a/mllib/src/main/scala/org/apache/spark/ml/linalg/dataTypes.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.linalg
-
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.types.DataType
-
-/**
- * :: DeveloperApi ::
- * SQL data types for vectors and matrices.
- */
-@DeveloperApi
-object sqlDataTypes {
-
-  /** Data type for [[Vector]]. */
-  val VectorType: DataType = new VectorUDT
-
-  /** Data type for [[Matrix]]. */
-  val MatrixType: DataType = new MatrixUDT
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index a23e90d..2bdc09e 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -376,7 +376,7 @@ object GeneralizedLinearRegression extends 
DefaultParamsReadable[GeneralizedLine
     def deviance(y: Double, mu: Double, weight: Double): Double
 
     /**
-     * Akaike's 'An Information Criterion'(AIC) value of the family for a 
given dataset.
+     * Akaike Information Criterion (AIC) value of the family for a given 
dataset.
      *
      * @param predictions an RDD of (y, mu, weight) of instances in evaluation 
dataset
      * @param deviance the deviance for the fitted model in evaluation dataset
@@ -702,13 +702,13 @@ class GeneralizedLinearRegressionModel private[ml] (
 
   import GeneralizedLinearRegression._
 
-  lazy val familyObj = Family.fromName($(family))
-  lazy val linkObj = if (isDefined(link)) {
+  private lazy val familyObj = Family.fromName($(family))
+  private lazy val linkObj = if (isDefined(link)) {
     Link.fromName($(link))
   } else {
     familyObj.defaultLink
   }
-  lazy val familyAndLink = new FamilyAndLink(familyObj, linkObj)
+  private lazy val familyAndLink = new FamilyAndLink(familyObj, linkObj)
 
   override protected def predict(features: Vector): Double = {
     val eta = predictLink(features)
@@ -1021,7 +1021,7 @@ class GeneralizedLinearRegressionSummary 
private[regression] (
     rss / degreesOfFreedom
   }
 
-  /** Akaike's "An Information Criterion"(AIC) for the fitted model. */
+  /** Akaike Information Criterion (AIC) for the fitted model. */
   @Since("2.0.0")
   lazy val aic: Double = {
     val w = weightCol

http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/test/java/org/apache/spark/ml/linalg/JavaSQLDataTypesSuite.java
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/java/org/apache/spark/ml/linalg/JavaSQLDataTypesSuite.java 
b/mllib/src/test/java/org/apache/spark/ml/linalg/JavaSQLDataTypesSuite.java
index b09e131..bd64a71 100644
--- a/mllib/src/test/java/org/apache/spark/ml/linalg/JavaSQLDataTypesSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/linalg/JavaSQLDataTypesSuite.java
@@ -20,7 +20,7 @@ package org.apache.spark.ml.linalg;
 import org.junit.Assert;
 import org.junit.Test;
 
-import static org.apache.spark.ml.linalg.sqlDataTypes.*;
+import static org.apache.spark.ml.linalg.SQLDataTypes.*;
 
 public class JavaSQLDataTypesSuite {
   @Test

http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/test/scala/org/apache/spark/ml/linalg/SQLDataTypesSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/linalg/SQLDataTypesSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/linalg/SQLDataTypesSuite.scala
index 13bf3d3..0bd0c32 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/linalg/SQLDataTypesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/linalg/SQLDataTypesSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkFunSuite
 
 class SQLDataTypesSuite extends SparkFunSuite {
   test("sqlDataTypes") {
-    assert(sqlDataTypes.VectorType === new VectorUDT)
-    assert(sqlDataTypes.MatrixType === new MatrixUDT)
+    assert(SQLDataTypes.VectorType === new VectorUDT)
+    assert(SQLDataTypes.MatrixType === new MatrixUDT)
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16485][ML][DOC] Fix privacy of GLM members, rename sqlDataTypes for ML, doc fixes

Reply via email to