Repository: spark Updated Branches: refs/heads/branch-2.0 e7a489c7f -> 838143a2a
[SPARK-15162][SPARK-15164][PYSPARK][DOCS][ML] update some pydocs ## What changes were proposed in this pull request? Mark ml.classification algorithms as experimental to match Scala algorithms, update PyDoc for for thresholds on `LogisticRegression` to have same level of info as Scala, and enable mathjax for PyDoc. ## How was this patch tested? Built docs locally & PySpark SQL tests Author: Holden Karau <hol...@us.ibm.com> Closes #12938 from holdenk/SPARK-15162-SPARK-15164-update-some-pydocs. (cherry picked from commit d281b0bafe6aa23085d4d2b68f0ce321f1978b50) Signed-off-by: Nick Pentreath <ni...@za.ibm.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/838143a2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/838143a2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/838143a2 Branch: refs/heads/branch-2.0 Commit: 838143a2a02192a9ebc955b8060a6520b62d9644 Parents: e7a489c Author: Holden Karau <hol...@us.ibm.com> Authored: Wed Jun 22 11:54:49 2016 +0200 Committer: Nick Pentreath <ni...@za.ibm.com> Committed: Wed Jun 22 11:55:10 2016 +0200 ---------------------------------------------------------------------- .../ml/classification/LogisticRegression.scala | 5 ++- python/docs/conf.py | 1 + python/pyspark/ml/classification.py | 38 ++++++++++++++++++-- 3 files changed, 39 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/838143a2/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index a7ba39e..2fa8fbc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -72,10 +72,9 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas /** * Get threshold for binary classification. * - * If [[threshold]] is set, returns that value. - * Otherwise, if [[thresholds]] is set with length 2 (i.e., binary classification), + * If [[thresholds]] is set with length 2 (i.e., binary classification), * this returns the equivalent threshold: {{{1 / (1 + thresholds(0) / thresholds(1))}}}. - * Otherwise, returns [[threshold]] default value. + * Otherwise, returns [[threshold]] if set, or its default value if unset. * * @group getParam * @throws IllegalArgumentException if [[thresholds]] is set to an array of length other than 2. http://git-wip-us.apache.org/repos/asf/spark/blob/838143a2/python/docs/conf.py ---------------------------------------------------------------------- diff --git a/python/docs/conf.py b/python/docs/conf.py index d35bf73..50fb317 100644 --- a/python/docs/conf.py +++ b/python/docs/conf.py @@ -32,6 +32,7 @@ extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'epytext', + 'sphinx.ext.mathjax', ] # Add any paths that contain templates here, relative to this directory. http://git-wip-us.apache.org/repos/asf/spark/blob/838143a2/python/pyspark/ml/classification.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index e86c27e..d6d713c 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -49,6 +49,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds, HasWeightCol, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Logistic regression. Currently, this class only supports binary classification. @@ -96,7 +98,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti threshold = Param(Params._dummy(), "threshold", "Threshold in binary classification prediction, in range [0, 1]." + - " If threshold and thresholds are both set, they must match.", + " If threshold and thresholds are both set, they must match." + + "e.g. if threshold is p, then thresholds must be equal to [1-p, p].", typeConverter=TypeConverters.toFloat) @keyword_only @@ -154,7 +157,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti @since("1.4.0") def getThreshold(self): """ - Gets the value of threshold or its default value. + Get threshold for binary classification. + + If :py:attr:`thresholds` is set with length 2 (i.e., binary classification), + this returns the equivalent threshold: + :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`. + Otherwise, returns :py:attr:`threshold` if set or its default value if unset. """ self._checkThresholdConsistency() if self.isSet(self.thresholds): @@ -208,6 +216,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by LogisticRegression. .. versionadded:: 1.3.0 @@ -491,6 +501,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_ learning algorithm for classification. It supports both binary and multiclass labels, as well as both continuous and categorical @@ -587,6 +599,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred @inherit_doc class DecisionTreeClassificationModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by DecisionTreeClassifier. .. versionadded:: 1.4.0 @@ -620,6 +634,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred RandomForestParams, TreeClassifierParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_ learning algorithm for classification. It supports both binary and multiclass labels, as well as both continuous and categorical @@ -714,6 +730,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred class RandomForestClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by RandomForestClassifier. .. versionadded:: 1.4.0 @@ -746,6 +764,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_ learning algorithm for classification. It supports binary labels, as well as both continuous and categorical features. @@ -865,6 +885,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by GBTClassifier. .. versionadded:: 1.4.0 @@ -896,6 +918,8 @@ class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable) class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol, HasRawPredictionCol, HasThresholds, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Naive Bayes Classifiers. It supports both Multinomial and Bernoulli NB. `Multinomial NB <http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html>`_ @@ -1019,6 +1043,8 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by NaiveBayes. .. versionadded:: 1.5.0 @@ -1046,6 +1072,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasMaxIter, HasTol, HasSeed, HasStepSize, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Classifier trainer based on the Multilayer Perceptron. Each layer has sigmoid activation function, output layer has softmax. Number of inputs has to be equal to the size of feature vectors. @@ -1216,6 +1244,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by MultilayerPerceptronClassifier. .. versionadded:: 1.6.0 @@ -1265,6 +1295,8 @@ class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasPredictionCol): @inherit_doc class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable): """ + .. note:: Experimental + Reduction of Multiclass Classification to Binary Classification. Performs reduction using one against all strategy. For a multiclass classification with k classes, train k models (one per class). @@ -1419,6 +1451,8 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable): class OneVsRestModel(Model, OneVsRestParams, MLReadable, MLWritable): """ + .. note:: Experimental + Model fitted by OneVsRest. This stores the models resulting from training k binary classifiers: one for each class. Each example is scored against all k models, and the model with the highest score --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org