Repository: spark
Updated Branches:
  refs/heads/branch-2.0 7d6bd1196 -> 798825c09


[SPARK-14615][ML][FOLLOWUP] Fix Python examples to use the new ML Vector and 
Matrix APIs in the ML pipeline based algorithms

## What changes were proposed in this pull request?

This PR fixes Python examples to use the new ML Vector and Matrix APIs in the 
ML pipeline based algorithms.

I firstly executed this shell command, `grep -r "from pyspark.mllib" .` and 
then executed them all.
Some of tests in `ml` produced the error messages as below:

```
pyspark.sql.utils.IllegalArgumentException: u'requirement failed: Input type 
must be VectorUDT but got org.apache.spark.mllib.linalg.VectorUDTf71b0bce.'
```

So, I fixed them to use new ones just identically with some Python tests fixed 
in https://github.com/apache/spark/pull/12627

## How was this patch tested?

Manually tested for all the examples listed by `grep -r "from pyspark.mllib" .`.

Author: hyukjinkwon <gurwls...@gmail.com>

Closes #13393 from HyukjinKwon/SPARK-14615.

(cherry picked from commit 99f3c82776fe5ea4f89a9965a288c7447585dc2c)
Signed-off-by: Joseph K. Bradley <jos...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/798825c0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/798825c0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/798825c0

Branch: refs/heads/branch-2.0
Commit: 798825c09ba55dca449bde3f00ff2aeafd6b05b7
Parents: 7d6bd11
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Fri Jun 10 18:29:26 2016 -0700
Committer: Joseph K. Bradley <jos...@databricks.com>
Committed: Fri Jun 10 18:29:37 2016 -0700

----------------------------------------------------------------------
 .../main/python/ml/aft_survival_regression.py    |  2 +-
 .../src/main/python/ml/chisq_selector_example.py |  2 +-
 examples/src/main/python/ml/dct_example.py       |  2 +-
 .../python/ml/elementwise_product_example.py     |  2 +-
 .../ml/estimator_transformer_param_example.py    |  2 +-
 examples/src/main/python/ml/pca_example.py       |  2 +-
 .../python/ml/polynomial_expansion_example.py    |  2 +-
 .../src/main/python/ml/simple_params_example.py  | 19 +++++++++----------
 .../main/python/ml/vector_assembler_example.py   |  2 +-
 .../src/main/python/ml/vector_slicer_example.py  |  2 +-
 10 files changed, 18 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/798825c0/examples/src/main/python/ml/aft_survival_regression.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/aft_survival_regression.py 
b/examples/src/main/python/ml/aft_survival_regression.py
index 9879679..060f017 100644
--- a/examples/src/main/python/ml/aft_survival_regression.py
+++ b/examples/src/main/python/ml/aft_survival_regression.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 # $example on$
 from pyspark.ml.regression import AFTSurvivalRegression
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
 # $example off$
 from pyspark.sql import SparkSession
 

http://git-wip-us.apache.org/repos/asf/spark/blob/798825c0/examples/src/main/python/ml/chisq_selector_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/chisq_selector_example.py 
b/examples/src/main/python/ml/chisq_selector_example.py
index 8bafb94..5e19ef1 100644
--- a/examples/src/main/python/ml/chisq_selector_example.py
+++ b/examples/src/main/python/ml/chisq_selector_example.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 from pyspark.sql import SparkSession
 # $example on$
 from pyspark.ml.feature import ChiSqSelector
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
 # $example off$
 
 if __name__ == "__main__":

http://git-wip-us.apache.org/repos/asf/spark/blob/798825c0/examples/src/main/python/ml/dct_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/dct_example.py 
b/examples/src/main/python/ml/dct_example.py
index e36fcde..a4f25df 100644
--- a/examples/src/main/python/ml/dct_example.py
+++ b/examples/src/main/python/ml/dct_example.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 # $example on$
 from pyspark.ml.feature import DCT
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
 # $example off$
 from pyspark.sql import SparkSession
 

http://git-wip-us.apache.org/repos/asf/spark/blob/798825c0/examples/src/main/python/ml/elementwise_product_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/elementwise_product_example.py 
b/examples/src/main/python/ml/elementwise_product_example.py
index 41727ed..598deae 100644
--- a/examples/src/main/python/ml/elementwise_product_example.py
+++ b/examples/src/main/python/ml/elementwise_product_example.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 # $example on$
 from pyspark.ml.feature import ElementwiseProduct
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
 # $example off$
 from pyspark.sql import SparkSession
 

http://git-wip-us.apache.org/repos/asf/spark/blob/798825c0/examples/src/main/python/ml/estimator_transformer_param_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/estimator_transformer_param_example.py 
b/examples/src/main/python/ml/estimator_transformer_param_example.py
index 0fcae0e..3bd3fd3 100644
--- a/examples/src/main/python/ml/estimator_transformer_param_example.py
+++ b/examples/src/main/python/ml/estimator_transformer_param_example.py
@@ -20,7 +20,7 @@ Estimator Transformer Param Example.
 """
 
 # $example on$
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
 from pyspark.ml.classification import LogisticRegression
 # $example off$
 from pyspark.sql import SparkSession

http://git-wip-us.apache.org/repos/asf/spark/blob/798825c0/examples/src/main/python/ml/pca_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/pca_example.py 
b/examples/src/main/python/ml/pca_example.py
index f1b3cde..414629f 100644
--- a/examples/src/main/python/ml/pca_example.py
+++ b/examples/src/main/python/ml/pca_example.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 # $example on$
 from pyspark.ml.feature import PCA
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
 # $example off$
 from pyspark.sql import SparkSession
 

http://git-wip-us.apache.org/repos/asf/spark/blob/798825c0/examples/src/main/python/ml/polynomial_expansion_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py 
b/examples/src/main/python/ml/polynomial_expansion_example.py
index 08882bc..9475e33 100644
--- a/examples/src/main/python/ml/polynomial_expansion_example.py
+++ b/examples/src/main/python/ml/polynomial_expansion_example.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 # $example on$
 from pyspark.ml.feature import PolynomialExpansion
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
 # $example off$
 from pyspark.sql import SparkSession
 

http://git-wip-us.apache.org/repos/asf/spark/blob/798825c0/examples/src/main/python/ml/simple_params_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/simple_params_example.py 
b/examples/src/main/python/ml/simple_params_example.py
index c57e59d..54fbc2c 100644
--- a/examples/src/main/python/ml/simple_params_example.py
+++ b/examples/src/main/python/ml/simple_params_example.py
@@ -21,9 +21,8 @@ import pprint
 import sys
 
 from pyspark.ml.classification import LogisticRegression
-from pyspark.mllib.linalg import DenseVector
-from pyspark.mllib.regression import LabeledPoint
-from pyspark.sql import SparkSession
+from pyspark.ml.linalg import DenseVector
+from pyspark.sql import Row, SparkSession
 
 """
 A simple example demonstrating ways to specify parameters for Estimators and 
Transformers.
@@ -42,10 +41,10 @@ if __name__ == "__main__":
     # A LabeledPoint is an Object with two fields named label and features
     # and Spark SQL identifies these fields and creates the schema 
appropriately.
     training = spark.createDataFrame([
-        LabeledPoint(1.0, DenseVector([0.0, 1.1, 0.1])),
-        LabeledPoint(0.0, DenseVector([2.0, 1.0, -1.0])),
-        LabeledPoint(0.0, DenseVector([2.0, 1.3, 1.0])),
-        LabeledPoint(1.0, DenseVector([0.0, 1.2, -0.5]))])
+        Row(label=1.0, features=DenseVector([0.0, 1.1, 0.1])),
+        Row(label=0.0, features=DenseVector([2.0, 1.0, -1.0])),
+        Row(label=0.0, features=DenseVector([2.0, 1.3, 1.0])),
+        Row(label=1.0, features=DenseVector([0.0, 1.2, -0.5]))])
 
     # Create a LogisticRegression instance with maxIter = 10.
     # This instance is an Estimator.
@@ -77,9 +76,9 @@ if __name__ == "__main__":
 
     # prepare test data.
     test = spark.createDataFrame([
-        LabeledPoint(1.0, DenseVector([-1.0, 1.5, 1.3])),
-        LabeledPoint(0.0, DenseVector([3.0, 2.0, -0.1])),
-        LabeledPoint(0.0, DenseVector([0.0, 2.2, -1.5]))])
+        Row(label=1.0, features=DenseVector([-1.0, 1.5, 1.3])),
+        Row(label=0.0, features=DenseVector([3.0, 2.0, -0.1])),
+        Row(label=0.0, features=DenseVector([0.0, 2.2, -1.5]))])
 
     # Make predictions on test data using the Transformer.transform() method.
     # LogisticRegressionModel.transform will only use the 'features' column.

http://git-wip-us.apache.org/repos/asf/spark/blob/798825c0/examples/src/main/python/ml/vector_assembler_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/vector_assembler_example.py 
b/examples/src/main/python/ml/vector_assembler_example.py
index b955ff0..bbfc316 100644
--- a/examples/src/main/python/ml/vector_assembler_example.py
+++ b/examples/src/main/python/ml/vector_assembler_example.py
@@ -18,7 +18,7 @@
 from __future__ import print_function
 
 # $example on$
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
 from pyspark.ml.feature import VectorAssembler
 # $example off$
 from pyspark.sql import SparkSession

http://git-wip-us.apache.org/repos/asf/spark/blob/798825c0/examples/src/main/python/ml/vector_slicer_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/vector_slicer_example.py 
b/examples/src/main/python/ml/vector_slicer_example.py
index b833a89..d2f46b1 100644
--- a/examples/src/main/python/ml/vector_slicer_example.py
+++ b/examples/src/main/python/ml/vector_slicer_example.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 # $example on$
 from pyspark.ml.feature import VectorSlicer
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
 from pyspark.sql.types import Row
 # $example off$
 from pyspark.sql import SparkSession


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to