Repository: spark
Updated Branches:
  refs/heads/master 045fc3606 -> 7e8279fde


[SPARK-15254][DOC] Improve ML pipeline Cross Validation Scaladoc & PyDoc

## What changes were proposed in this pull request?
Updated ML pipeline Cross Validation Scaladoc & PyDoc.

## How was this patch tested?

Documentation update

(If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)

Author: krishnakalyan3 <krishnakaly...@gmail.com>

Closes #13894 from krishnakalyan3/kfold-cv.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7e8279fd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7e8279fd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7e8279fd

Branch: refs/heads/master
Commit: 7e8279fde176b08687adf2b410693b35cfbd4b46
Parents: 045fc36
Author: krishnakalyan3 <krishnakaly...@gmail.com>
Authored: Wed Jul 27 15:37:38 2016 +0200
Committer: Nick Pentreath <ni...@za.ibm.com>
Committed: Wed Jul 27 15:37:38 2016 +0200

----------------------------------------------------------------------
 .../org/apache/spark/ml/tuning/CrossValidator.scala    | 10 ++++++++--
 python/pyspark/ml/tuning.py                            | 13 +++++++++++--
 2 files changed, 19 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7e8279fd/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala 
b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 5205578..6ea52ef 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -55,7 +55,11 @@ private[ml] trait CrossValidatorParams extends 
ValidatorParams {
 }
 
 /**
- * K-fold cross validation.
+ * K-fold cross validation performs model selection by splitting the dataset 
into a set of
+ * non-overlapping randomly partitioned folds which are used as separate 
training and test datasets
+ * e.g., with k=3 folds, K-fold cross validation will generate 3 (training, 
test) dataset pairs,
+ * each of which uses 2/3 of the data for training and 1/3 for testing. Each 
fold is used as the
+ * test set exactly once.
  */
 @Since("1.2.0")
 class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
@@ -188,7 +192,9 @@ object CrossValidator extends MLReadable[CrossValidator] {
 }
 
 /**
- * Model from k-fold cross validation.
+ * CrossValidatorModel contains the model with the highest average 
cross-validation
+ * metric across folds and uses this model to transform input data. 
CrossValidatorModel
+ * also tracks the metrics for each param map evaluated.
  *
  * @param bestModel The best model selected from k-fold cross validation.
  * @param avgMetrics Average cross-validation metrics for each paramMap in

http://git-wip-us.apache.org/repos/asf/spark/blob/7e8279fd/python/pyspark/ml/tuning.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 298314d..7f967e5 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -143,7 +143,13 @@ class ValidatorParams(HasSeed):
 
 class CrossValidator(Estimator, ValidatorParams):
     """
-    K-fold cross validation.
+
+    K-fold cross validation performs model selection by splitting the dataset 
into a set of
+    non-overlapping randomly partitioned folds which are used as separate 
training and test datasets
+    e.g., with k=3 folds, K-fold cross validation will generate 3 (training, 
test) dataset pairs,
+    each of which uses 2/3 of the data for training and 1/3 for testing. Each 
fold is used as the
+    test set exactly once.
+
 
     >>> from pyspark.ml.classification import LogisticRegression
     >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator
@@ -260,7 +266,10 @@ class CrossValidator(Estimator, ValidatorParams):
 
 class CrossValidatorModel(Model, ValidatorParams):
     """
-    Model from k-fold cross validation.
+
+    CrossValidatorModel contains the model with the highest average 
cross-validation
+    metric across folds and uses this model to transform input data. 
CrossValidatorModel
+    also tracks the metrics for each param map evaluated.
 
     .. versionadded:: 1.4.0
     """


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to