spark git commit: [SPARK-15168][PYSPARK][ML] Add missing params to MultilayerPerceptronClassifier

mlnick Fri, 03 Jun 2016 15:56:45 -0700

Repository: spark
Updated Branches:
  refs/heads/master b1cc7da3e -> 67cc89ff0



[SPARK-15168][PYSPARK][ML] Add missing params to MultilayerPerceptronClassifier

## What changes were proposed in this pull request?

MultilayerPerceptronClassifier is missing step size, solver, and weights. Add 
these params. Also clarify the scaladoc a bit while we are updating these 
params.

Eventually we should follow up and unify the HasSolver params (filed 
https://issues.apache.org/jira/browse/SPARK-15169 )

## How was this patch tested?

Doc tests

Author: Holden Karau <hol...@us.ibm.com>

Closes #12943 from 
holdenk/SPARK-15168-add-missing-params-to-MultilayerPerceptronClassifier.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/67cc89ff
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/67cc89ff
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/67cc89ff

Branch: refs/heads/master
Commit: 67cc89ff028324ba4a7a7d9c19a268b9afea0031
Parents: b1cc7da
Author: Holden Karau <hol...@us.ibm.com>
Authored: Fri Jun 3 15:56:17 2016 -0700
Committer: Nick Pentreath <ni...@za.ibm.com>
Committed: Fri Jun 3 15:56:17 2016 -0700

----------------------------------------------------------------------
 python/pyspark/ml/classification.py | 75 ++++++++++++++++++++++++++++----
 1 file changed, 66 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/67cc89ff/python/pyspark/ml/classification.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/classification.py 
b/python/pyspark/ml/classification.py
index 177cf9d..7710fdf 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1041,7 +1041,8 @@ class NaiveBayesModel(JavaModel, JavaMLWritable, 
JavaMLReadable):
 
 @inherit_doc
 class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPredictionCol,
-                                     HasMaxIter, HasTol, HasSeed, 
JavaMLWritable, JavaMLReadable):
+                                     HasMaxIter, HasTol, HasSeed, HasStepSize, 
JavaMLWritable,
+                                     JavaMLReadable):
     """
     Classifier trainer based on the Multilayer Perceptron.
     Each layer has sigmoid activation function, output layer has softmax.
@@ -1054,12 +1055,12 @@ class MultilayerPerceptronClassifier(JavaEstimator, 
HasFeaturesCol, HasLabelCol,
     ...     (1.0, Vectors.dense([0.0, 1.0])),
     ...     (1.0, Vectors.dense([1.0, 0.0])),
     ...     (0.0, Vectors.dense([1.0, 1.0]))], ["label", "features"])
-    >>> mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[2, 5, 2], 
blockSize=1, seed=123)
+    >>> mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[2, 2, 2], 
blockSize=1, seed=123)
     >>> model = mlp.fit(df)
     >>> model.layers
-    [2, 5, 2]
+    [2, 2, 2]
     >>> model.weights.size
-    27
+    12
     >>> testDF = spark.createDataFrame([
     ...     (Vectors.dense([1.0, 0.0]),),
     ...     (Vectors.dense([0.0, 0.0]),)], ["features"])
@@ -1083,6 +1084,12 @@ class MultilayerPerceptronClassifier(JavaEstimator, 
HasFeaturesCol, HasLabelCol,
     True
     >>> model.weights == model2.weights
     True
+    >>> mlp2 = mlp2.setInitialWeights(list(range(0, 12)))
+    >>> model3 = mlp2.fit(df)
+    >>> model3.weights != model2.weights
+    True
+    >>> model3.layers == model.layers
+    True
 
     .. versionadded:: 1.6.0
     """
@@ -1096,28 +1103,36 @@ class MultilayerPerceptronClassifier(JavaEstimator, 
HasFeaturesCol, HasLabelCol,
                       "remaining data in a partition then it is adjusted to 
the size of this " +
                       "data. Recommended size is between 10 and 1000, default 
is 128.",
                       typeConverter=TypeConverters.toInt)
+    solver = Param(Params._dummy(), "solver", "The solver algorithm for 
optimization. Supported " +
+                   "options: l-bfgs, gd.", 
typeConverter=TypeConverters.toString)
+    initialWeights = Param(Params._dummy(), "initialWeights", "The initial 
weights of the model.",
+                           typeConverter=TypeConverters.toVector)
 
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", 
predictionCol="prediction",
-                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128):
+                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, 
stepSize=0.03,
+                 solver="l-bfgs", initialWeights=None):
         """
         __init__(self, featuresCol="features", labelCol="label", 
predictionCol="prediction", \
-                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128)
+                 maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, 
stepSize=0.03, \
+                 solver="l-bfgs", initialWeights=None)
         """
         super(MultilayerPerceptronClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             
"org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid)
-        self._setDefault(maxIter=100, tol=1E-4, blockSize=128)
+        self._setDefault(maxIter=100, tol=1E-4, blockSize=128, stepSize=0.03, 
solver="l-bfgs")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     @since("1.6.0")
     def setParams(self, featuresCol="features", labelCol="label", 
predictionCol="prediction",
-                  maxIter=100, tol=1e-4, seed=None, layers=None, 
blockSize=128):
+                  maxIter=100, tol=1e-4, seed=None, layers=None, 
blockSize=128, stepSize=0.03,
+                  solver="l-bfgs", initialWeights=None):
         """
         setParams(self, featuresCol="features", labelCol="label", 
predictionCol="prediction", \
-                  maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128)
+                  maxIter=100, tol=1e-4, seed=None, layers=None, 
blockSize=128, stepSize=0.03, \
+                  solver="l-bfgs", initialWeights=None)
         Sets params for MultilayerPerceptronClassifier.
         """
         kwargs = self.setParams._input_kwargs
@@ -1154,6 +1169,48 @@ class MultilayerPerceptronClassifier(JavaEstimator, 
HasFeaturesCol, HasLabelCol,
         """
         return self.getOrDefault(self.blockSize)
 
+    @since("2.0.0")
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        return self._set(stepSize=value)
+
+    @since("2.0.0")
+    def getStepSize(self):
+        """
+        Gets the value of stepSize or its default value.
+        """
+        return self.getOrDefault(self.stepSize)
+
+    @since("2.0.0")
+    def setSolver(self, value):
+        """
+        Sets the value of :py:attr:`solver`.
+        """
+        return self._set(solver=value)
+
+    @since("2.0.0")
+    def getSolver(self):
+        """
+        Gets the value of solver or its default value.
+        """
+        return self.getOrDefault(self.solver)
+
+    @since("2.0.0")
+    def setInitialWeights(self, value):
+        """
+        Sets the value of :py:attr:`initialWeights`.
+        """
+        return self._set(initialWeights=value)
+
+    @since("2.0.0")
+    def getInitialWeights(self):
+        """
+        Gets the value of initialWeights or its default value.
+        """
+        return self.getOrDefault(self.initialWeights)
+
 
 class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, 
JavaMLReadable):
     """


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15168][PYSPARK][ML] Add missing params to MultilayerPerceptronClassifier

Reply via email to