spark git commit: [SPARK-9766] [ML] [PySpark] check and add miss docs for PySpark ML

jkbradley Wed, 12 Aug 2015 13:25:27 -0700

Repository: spark
Updated Branches:
  refs/heads/master 60103ecd3 -> 762bacc16



[SPARK-9766] [ML] [PySpark] check and add miss docs for PySpark ML

Check and add miss docs for PySpark ML (this issue only check miss docs for 
o.a.s.ml not o.a.s.mllib).

Author: Yanbo Liang <yblia...@gmail.com>

Closes #8059 from yanboliang/SPARK-9766.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/762bacc1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/762bacc1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/762bacc1

Branch: refs/heads/master
Commit: 762bacc16ac5e74c8b05a7c1e3e367d1d1633cef
Parents: 60103ec
Author: Yanbo Liang <yblia...@gmail.com>
Authored: Wed Aug 12 13:24:18 2015 -0700
Committer: Joseph K. Bradley <jos...@databricks.com>
Committed: Wed Aug 12 13:24:18 2015 -0700

----------------------------------------------------------------------
 python/pyspark/ml/classification.py | 12 ++++++++++--
 python/pyspark/ml/clustering.py     |  4 +++-
 python/pyspark/ml/evaluation.py     |  3 ++-
 python/pyspark/ml/feature.py        |  9 +++++----
 4 files changed, 20 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/762bacc1/python/pyspark/ml/classification.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/classification.py 
b/python/pyspark/ml/classification.py
index 5978d8f..6702dce 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -34,6 +34,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPredicti
                          HasRegParam, HasTol, HasProbabilityCol, 
HasRawPredictionCol):
     """
     Logistic regression.
+    Currently, this class only supports binary classification.
 
     >>> from pyspark.sql import Row
     >>> from pyspark.mllib.linalg import Vectors
@@ -96,8 +97,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPredicti
         #  is an L2 penalty. For alpha = 1, it is an L1 penalty.
         self.elasticNetParam = \
             Param(self, "elasticNetParam",
-                  "the ElasticNet mixing parameter, in range [0, 1]. For alpha 
= 0, the penalty " +
-                  "is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+                  "the ElasticNet mixing parameter, in range [0, 1]. For alpha 
= 0, " +
+                  "the penalty is an L2 penalty. For alpha = 1, it is an L1 
penalty.")
         #: param for whether to fit an intercept term.
         self.fitIntercept = Param(self, "fitIntercept", "whether to fit an 
intercept term.")
         #: param for threshold in binary classification prediction, in range 
[0, 1].
@@ -656,6 +657,13 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, 
HasLabelCol, HasPredictionCol, H
                  HasRawPredictionCol):
     """
     Naive Bayes Classifiers.
+    It supports both Multinomial and Bernoulli NB. Multinomial NB
+    
(`http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html`)
+    can handle finitely supported discrete data. For example, by converting 
documents into
+    TF-IDF vectors, it can be used for document classification. By making 
every vector a
+    binary (0/1) data, it can also be used as Bernoulli NB
+    
(`http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html`).
+    The input feature values must be nonnegative.
 
     >>> from pyspark.sql import Row
     >>> from pyspark.mllib.linalg import Vectors

http://git-wip-us.apache.org/repos/asf/spark/blob/762bacc1/python/pyspark/ml/clustering.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index b5e9b65..4833871 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -37,7 +37,9 @@ class KMeansModel(JavaModel):
 @inherit_doc
 class KMeans(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed):
     """
-    K-means Clustering
+    K-means clustering with support for multiple parallel runs and a k-means++ 
like initialization
+    mode (the k-means|| algorithm by Bahmani et al). When multiple concurrent 
runs are requested,
+    they are executed together with joint passes over the data for efficiency.
 
     >>> from pyspark.mllib.linalg import Vectors
     >>> data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),

http://git-wip-us.apache.org/repos/asf/spark/blob/762bacc1/python/pyspark/ml/evaluation.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index 06e8093..2734092 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -23,7 +23,8 @@ from pyspark.ml.param.shared import HasLabelCol, 
HasPredictionCol, HasRawPredict
 from pyspark.ml.util import keyword_only
 from pyspark.mllib.common import inherit_doc
 
-__all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator']
+__all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator',
+           'MulticlassClassificationEvaluator']
 
 
 @inherit_doc

http://git-wip-us.apache.org/repos/asf/spark/blob/762bacc1/python/pyspark/ml/feature.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index cb4dfa2..535d553 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -26,10 +26,11 @@ from pyspark.ml.wrapper import JavaEstimator, JavaModel, 
JavaTransformer
 from pyspark.mllib.common import inherit_doc
 from pyspark.mllib.linalg import _convert_to_vector
 
-__all__ = ['Binarizer', 'HashingTF', 'IDF', 'IDFModel', 'NGram', 'Normalizer', 
'OneHotEncoder',
-           'PolynomialExpansion', 'RegexTokenizer', 'StandardScaler', 
'StandardScalerModel',
-           'StringIndexer', 'StringIndexerModel', 'Tokenizer', 
'VectorAssembler', 'VectorIndexer',
-           'Word2Vec', 'Word2VecModel', 'PCA', 'PCAModel', 'RFormula', 
'RFormulaModel']
+__all__ = ['Binarizer', 'Bucketizer', 'HashingTF', 'IDF', 'IDFModel', 'NGram', 
'Normalizer',
+           'OneHotEncoder', 'PolynomialExpansion', 'RegexTokenizer', 
'StandardScaler',
+           'StandardScalerModel', 'StringIndexer', 'StringIndexerModel', 
'Tokenizer',
+           'VectorAssembler', 'VectorIndexer', 'Word2Vec', 'Word2VecModel', 
'PCA',
+           'PCAModel', 'RFormula', 'RFormulaModel']
 
 
 @inherit_doc


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-9766] [ML] [PySpark] check and add miss docs for PySpark ML

Reply via email to