Repository: spark Updated Branches: refs/heads/master fd8af3971 -> a95252823
[SPARK-15771][ML][EXAMPLES] Use 'accuracy' rather than 'precision' in many ML examples ## What changes were proposed in this pull request? Since [SPARK-15617](https://issues.apache.org/jira/browse/SPARK-15617) deprecated ```precision``` in ```MulticlassClassificationEvaluator```, many ML examples broken. ```python pyspark.sql.utils.IllegalArgumentException: u'MulticlassClassificationEvaluator_4c3bb1d73d8cc0cedae6 parameter metricName given invalid value precision.' ``` We should use ```accuracy``` to replace ```precision``` in these examples. ## How was this patch tested? Offline tests. Author: Yanbo Liang <yblia...@gmail.com> Closes #13519 from yanboliang/spark-15771. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a9525282 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a9525282 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a9525282 Branch: refs/heads/master Commit: a95252823e09939b654dd425db38dadc4100bc87 Parents: fd8af39 Author: Yanbo Liang <yblia...@gmail.com> Authored: Mon Jun 6 09:36:34 2016 +0100 Committer: Sean Owen <so...@cloudera.com> Committed: Mon Jun 6 09:36:34 2016 +0100 ---------------------------------------------------------------------- .../examples/ml/JavaDecisionTreeClassificationExample.java | 2 +- .../examples/ml/JavaGradientBoostedTreeClassifierExample.java | 2 +- .../examples/ml/JavaMultilayerPerceptronClassifierExample.java | 6 +++--- .../org/apache/spark/examples/ml/JavaNaiveBayesExample.java | 6 +++--- .../org/apache/spark/examples/ml/JavaOneVsRestExample.java | 6 +++--- .../spark/examples/ml/JavaRandomForestClassifierExample.java | 2 +- .../src/main/python/ml/decision_tree_classification_example.py | 2 +- .../main/python/ml/gradient_boosted_tree_classifier_example.py | 2 +- .../src/main/python/ml/multilayer_perceptron_classification.py | 6 +++--- examples/src/main/python/ml/naive_bayes_example.py | 6 +++--- examples/src/main/python/ml/one_vs_rest_example.py | 6 +++--- .../src/main/python/ml/random_forest_classifier_example.py | 2 +- .../spark/examples/ml/DecisionTreeClassificationExample.scala | 2 +- .../examples/ml/GradientBoostedTreeClassifierExample.scala | 2 +- .../examples/ml/MultilayerPerceptronClassifierExample.scala | 6 +++--- .../scala/org/apache/spark/examples/ml/NaiveBayesExample.scala | 6 +++--- .../scala/org/apache/spark/examples/ml/OneVsRestExample.scala | 6 +++--- .../spark/examples/ml/RandomForestClassifierExample.scala | 2 +- python/pyspark/ml/evaluation.py | 2 +- 19 files changed, 37 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java index bdb76f0..a9c6e7f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java @@ -90,7 +90,7 @@ public class JavaDecisionTreeClassificationExample { MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator() .setLabelCol("indexedLabel") .setPredictionCol("prediction") - .setMetricName("precision"); + .setMetricName("accuracy"); double accuracy = evaluator.evaluate(predictions); System.out.println("Test Error = " + (1.0 - accuracy)); http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java index 5c2e03e..3e9eb99 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java @@ -92,7 +92,7 @@ public class JavaGradientBoostedTreeClassifierExample { MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator() .setLabelCol("indexedLabel") .setPredictionCol("prediction") - .setMetricName("precision"); + .setMetricName("accuracy"); double accuracy = evaluator.evaluate(predictions); System.out.println("Test Error = " + (1.0 - accuracy)); http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java index c7d03d8..0f1d9c2 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java @@ -57,12 +57,12 @@ public class JavaMultilayerPerceptronClassifierExample { .setMaxIter(100); // train the model MultilayerPerceptronClassificationModel model = trainer.fit(train); - // compute precision on the test set + // compute accuracy on the test set Dataset<Row> result = model.transform(test); Dataset<Row> predictionAndLabels = result.select("prediction", "label"); MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator() - .setMetricName("precision"); - System.out.println("Precision = " + evaluator.evaluate(predictionAndLabels)); + .setMetricName("accuracy"); + System.out.println("Accuracy = " + evaluator.evaluate(predictionAndLabels)); // $example off$ spark.stop(); http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java index 50a46a5..3226d5d 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java @@ -50,12 +50,12 @@ public class JavaNaiveBayesExample { NaiveBayes nb = new NaiveBayes(); // train the model NaiveBayesModel model = nb.fit(train); - // compute precision on the test set + // compute accuracy on the test set Dataset<Row> result = model.transform(test); Dataset<Row> predictionAndLabels = result.select("prediction", "label"); MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator() - .setMetricName("precision"); - System.out.println("Precision = " + evaluator.evaluate(predictionAndLabels)); + .setMetricName("accuracy"); + System.out.println("Accuracy = " + evaluator.evaluate(predictionAndLabels)); // $example off$ spark.stop(); http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java index 5bf455e..c6a083d 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java @@ -71,11 +71,11 @@ public class JavaOneVsRestExample { // obtain evaluator. MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator() - .setMetricName("precision"); + .setMetricName("accuracy"); // compute the classification error on test data. - double precision = evaluator.evaluate(predictions); - System.out.println("Test Error : " + (1 - precision)); + double accuracy = evaluator.evaluate(predictions); + System.out.println("Test Error : " + (1 - accuracy)); // $example off$ spark.stop(); http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestClassifierExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestClassifierExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestClassifierExample.java index 14af2fb..da2633e 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestClassifierExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestClassifierExample.java @@ -88,7 +88,7 @@ public class JavaRandomForestClassifierExample { MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator() .setLabelCol("indexedLabel") .setPredictionCol("prediction") - .setMetricName("precision"); + .setMetricName("accuracy"); double accuracy = evaluator.evaluate(predictions); System.out.println("Test Error = " + (1.0 - accuracy)); http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/python/ml/decision_tree_classification_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/decision_tree_classification_example.py b/examples/src/main/python/ml/decision_tree_classification_example.py index 9b40b70..708f1af 100644 --- a/examples/src/main/python/ml/decision_tree_classification_example.py +++ b/examples/src/main/python/ml/decision_tree_classification_example.py @@ -66,7 +66,7 @@ if __name__ == "__main__": # Select (prediction, true label) and compute test error evaluator = MulticlassClassificationEvaluator( - labelCol="indexedLabel", predictionCol="prediction", metricName="precision") + labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy") accuracy = evaluator.evaluate(predictions) print("Test Error = %g " % (1.0 - accuracy)) http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py b/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py index 50026d7..6c2d7e7 100644 --- a/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py +++ b/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py @@ -66,7 +66,7 @@ if __name__ == "__main__": # Select (prediction, true label) and compute test error evaluator = MulticlassClassificationEvaluator( - labelCol="indexedLabel", predictionCol="prediction", metricName="precision") + labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy") accuracy = evaluator.evaluate(predictions) print("Test Error = %g" % (1.0 - accuracy)) http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/python/ml/multilayer_perceptron_classification.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/multilayer_perceptron_classification.py b/examples/src/main/python/ml/multilayer_perceptron_classification.py index 8bededc..aa33bef 100644 --- a/examples/src/main/python/ml/multilayer_perceptron_classification.py +++ b/examples/src/main/python/ml/multilayer_perceptron_classification.py @@ -43,11 +43,11 @@ if __name__ == "__main__": trainer = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=128, seed=1234) # train the model model = trainer.fit(train) - # compute precision on the test set + # compute accuracy on the test set result = model.transform(test) predictionAndLabels = result.select("prediction", "label") - evaluator = MulticlassClassificationEvaluator(metricName="precision") - print("Precision:" + str(evaluator.evaluate(predictionAndLabels))) + evaluator = MulticlassClassificationEvaluator(metricName="accuracy") + print("Accuracy: " + str(evaluator.evaluate(predictionAndLabels))) # $example off$ spark.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/python/ml/naive_bayes_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/naive_bayes_example.py b/examples/src/main/python/ml/naive_bayes_example.py index 89255a2..8bc3222 100644 --- a/examples/src/main/python/ml/naive_bayes_example.py +++ b/examples/src/main/python/ml/naive_bayes_example.py @@ -43,11 +43,11 @@ if __name__ == "__main__": # train the model model = nb.fit(train) - # compute precision on the test set + # compute accuracy on the test set result = model.transform(test) predictionAndLabels = result.select("prediction", "label") - evaluator = MulticlassClassificationEvaluator(metricName="precision") - print("Precision:" + str(evaluator.evaluate(predictionAndLabels))) + evaluator = MulticlassClassificationEvaluator(metricName="accuracy") + print("Accuracy: " + str(evaluator.evaluate(predictionAndLabels))) # $example off$ spark.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/python/ml/one_vs_rest_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/one_vs_rest_example.py b/examples/src/main/python/ml/one_vs_rest_example.py index 971156d..b82087b 100644 --- a/examples/src/main/python/ml/one_vs_rest_example.py +++ b/examples/src/main/python/ml/one_vs_rest_example.py @@ -58,11 +58,11 @@ if __name__ == "__main__": predictions = ovrModel.transform(test) # obtain evaluator. - evaluator = MulticlassClassificationEvaluator(metricName="precision") + evaluator = MulticlassClassificationEvaluator(metricName="accuracy") # compute the classification error on test data. - precision = evaluator.evaluate(predictions) - print("Test Error : " + str(1 - precision)) + accuracy = evaluator.evaluate(predictions) + print("Test Error : " + str(1 - accuracy)) # $example off$ spark.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/python/ml/random_forest_classifier_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/random_forest_classifier_example.py b/examples/src/main/python/ml/random_forest_classifier_example.py index c618eaf..a7fc765 100644 --- a/examples/src/main/python/ml/random_forest_classifier_example.py +++ b/examples/src/main/python/ml/random_forest_classifier_example.py @@ -66,7 +66,7 @@ if __name__ == "__main__": # Select (prediction, true label) and compute test error evaluator = MulticlassClassificationEvaluator( - labelCol="indexedLabel", predictionCol="prediction", metricName="precision") + labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy") accuracy = evaluator.evaluate(predictions) print("Test Error = %g" % (1.0 - accuracy)) http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala ---------------------------------------------------------------------- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala index b3103ce..bc6d327 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala @@ -81,7 +81,7 @@ object DecisionTreeClassificationExample { val evaluator = new MulticlassClassificationEvaluator() .setLabelCol("indexedLabel") .setPredictionCol("prediction") - .setMetricName("precision") + .setMetricName("accuracy") val accuracy = evaluator.evaluate(predictions) println("Test Error = " + (1.0 - accuracy)) http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala ---------------------------------------------------------------------- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala index 0d1ffbe..9a39acf 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala @@ -83,7 +83,7 @@ object GradientBoostedTreeClassifierExample { val evaluator = new MulticlassClassificationEvaluator() .setLabelCol("indexedLabel") .setPredictionCol("prediction") - .setMetricName("precision") + .setMetricName("accuracy") val accuracy = evaluator.evaluate(predictions) println("Test Error = " + (1.0 - accuracy)) http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala ---------------------------------------------------------------------- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala index 0e780fb..e8a9b32 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala @@ -55,12 +55,12 @@ object MultilayerPerceptronClassifierExample { .setMaxIter(100) // train the model val model = trainer.fit(train) - // compute precision on the test set + // compute accuracy on the test set val result = model.transform(test) val predictionAndLabels = result.select("prediction", "label") val evaluator = new MulticlassClassificationEvaluator() - .setMetricName("precision") - println("Precision:" + evaluator.evaluate(predictionAndLabels)) + .setMetricName("accuracy") + println("Accuracy: " + evaluator.evaluate(predictionAndLabels)) // $example off$ spark.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala ---------------------------------------------------------------------- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala index 90cdebf..a59ba18 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala @@ -49,9 +49,9 @@ object NaiveBayesExample { val evaluator = new MulticlassClassificationEvaluator() .setLabelCol("label") .setPredictionCol("prediction") - .setMetricName("precision") - val precision = evaluator.evaluate(predictions) - println("Precision:" + precision) + .setMetricName("accuracy") + val accuracy = evaluator.evaluate(predictions) + println("Accuracy: " + accuracy) // $example off$ spark.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala ---------------------------------------------------------------------- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala index 0da8e31..acde110 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala @@ -65,11 +65,11 @@ object OneVsRestExample { // obtain evaluator. val evaluator = new MulticlassClassificationEvaluator() - .setMetricName("precision") + .setMetricName("accuracy") // compute the classification error on test data. - val precision = evaluator.evaluate(predictions) - println(s"Test Error : ${1 - precision}") + val accuracy = evaluator.evaluate(predictions) + println(s"Test Error : ${1 - accuracy}") // $example off$ spark.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala ---------------------------------------------------------------------- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala index cccc4a6..5eafda8 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala @@ -83,7 +83,7 @@ object RandomForestClassifierExample { val evaluator = new MulticlassClassificationEvaluator() .setLabelCol("indexedLabel") .setPredictionCol("prediction") - .setMetricName("precision") + .setMetricName("accuracy") val accuracy = evaluator.evaluate(predictions) println("Test Error = " + (1.0 - accuracy)) http://git-wip-us.apache.org/repos/asf/spark/blob/a9525282/python/pyspark/ml/evaluation.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index c480525..cd071f1 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -265,7 +265,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio """ metricName = Param(Params._dummy(), "metricName", "metric name in evaluation " - "(f1|precision|recall|weightedPrecision|weightedRecall|accuracy)", + "(f1|weightedPrecision|weightedRecall|accuracy)", typeConverter=TypeConverters.toString) @keyword_only --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org