Repository: spark Updated Branches: refs/heads/branch-2.0 81f080425 -> 09178b6ee
[SPARK-18133][BRANCH-2.0][EXAMPLES][ML] Python ML Pipeline Exampl⦠## What changes were proposed in this pull request? [Fix] [branch-2.0] In Python 3, there is only one integer type (i.e., int), which mostly behaves like the long type in Python 2. Since Python 3 won't accept "L", so removed "L" in all examples. ## How was this patch tested? Unit tests. â¦e has syntax errors] Author: Jagadeesan <a...@us.ibm.com> Closes #15729 from jagadeesanas2/SPARK-18133_branch2.0. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/09178b6e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/09178b6e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/09178b6e Branch: refs/heads/branch-2.0 Commit: 09178b6eefd33011c3e90164356a5d6c3ae737bd Parents: 81f0804 Author: Jagadeesan <a...@us.ibm.com> Authored: Wed Nov 2 09:23:30 2016 +0000 Committer: Sean Owen <so...@cloudera.com> Committed: Wed Nov 2 09:23:30 2016 +0000 ---------------------------------------------------------------------- examples/src/main/python/ml/cross_validator.py | 8 ++++---- examples/src/main/python/ml/pipeline_example.py | 16 ++++++++-------- .../mllib/binary_classification_metrics_example.py | 2 +- .../python/mllib/multi_class_metrics_example.py | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/09178b6e/examples/src/main/python/ml/cross_validator.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/cross_validator.py b/examples/src/main/python/ml/cross_validator.py index a41df6c..283db5d 100644 --- a/examples/src/main/python/ml/cross_validator.py +++ b/examples/src/main/python/ml/cross_validator.py @@ -83,10 +83,10 @@ if __name__ == "__main__": # Prepare test documents, which are unlabeled. test = spark.createDataFrame([ - (4L, "spark i j k"), - (5L, "l m n"), - (6L, "mapreduce spark"), - (7L, "apache hadoop") + (4, "spark i j k"), + (5, "l m n"), + (6, "mapreduce spark"), + (7, "apache hadoop") ], ["id", "text"]) # Make predictions on test documents. cvModel uses the best model found (lrModel). http://git-wip-us.apache.org/repos/asf/spark/blob/09178b6e/examples/src/main/python/ml/pipeline_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/pipeline_example.py b/examples/src/main/python/ml/pipeline_example.py index bd10cfd..1926cd2 100644 --- a/examples/src/main/python/ml/pipeline_example.py +++ b/examples/src/main/python/ml/pipeline_example.py @@ -35,10 +35,10 @@ if __name__ == "__main__": # $example on$ # Prepare training documents from a list of (id, text, label) tuples. training = spark.createDataFrame([ - (0L, "a b c d e spark", 1.0), - (1L, "b d", 0.0), - (2L, "spark f g h", 1.0), - (3L, "hadoop mapreduce", 0.0)], ["id", "text", "label"]) + (0, "a b c d e spark", 1.0), + (1, "b d", 0.0), + (2, "spark f g h", 1.0), + (3, "hadoop mapreduce", 0.0)], ["id", "text", "label"]) # Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr. tokenizer = Tokenizer(inputCol="text", outputCol="words") @@ -51,10 +51,10 @@ if __name__ == "__main__": # Prepare test documents, which are unlabeled (id, text) tuples. test = spark.createDataFrame([ - (4L, "spark i j k"), - (5L, "l m n"), - (6L, "mapreduce spark"), - (7L, "apache hadoop")], ["id", "text"]) + (4, "spark i j k"), + (5, "l m n"), + (6, "mapreduce spark"), + (7, "apache hadoop")], ["id", "text"]) # Make predictions on test documents and print columns of interest. prediction = model.transform(test) http://git-wip-us.apache.org/repos/asf/spark/blob/09178b6e/examples/src/main/python/mllib/binary_classification_metrics_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/mllib/binary_classification_metrics_example.py b/examples/src/main/python/mllib/binary_classification_metrics_example.py index daf000e..91f8378 100644 --- a/examples/src/main/python/mllib/binary_classification_metrics_example.py +++ b/examples/src/main/python/mllib/binary_classification_metrics_example.py @@ -39,7 +39,7 @@ if __name__ == "__main__": .rdd.map(lambda row: LabeledPoint(row[0], row[1])) # Split data into training (60%) and test (40%) - training, test = data.randomSplit([0.6, 0.4], seed=11L) + training, test = data.randomSplit([0.6, 0.4], seed=11) training.cache() # Run training algorithm to build the model http://git-wip-us.apache.org/repos/asf/spark/blob/09178b6e/examples/src/main/python/mllib/multi_class_metrics_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/mllib/multi_class_metrics_example.py b/examples/src/main/python/mllib/multi_class_metrics_example.py index cd56b3c..7dc5fb4 100644 --- a/examples/src/main/python/mllib/multi_class_metrics_example.py +++ b/examples/src/main/python/mllib/multi_class_metrics_example.py @@ -32,7 +32,7 @@ if __name__ == "__main__": data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_multiclass_classification_data.txt") # Split data into training (60%) and test (40%) - training, test = data.randomSplit([0.6, 0.4], seed=11L) + training, test = data.randomSplit([0.6, 0.4], seed=11) training.cache() # Run training algorithm to build the model --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org