This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new d71be73 [SPARK-32428][EXAMPLES] Make BinaryClassificationMetricsExample cons… d71be73 is described below commit d71be73355033931a1b999f0ca4a05b95303187d Author: Itsuki Toyota <tits...@cpan.org> AuthorDate: Sun Jul 26 09:12:43 2020 -0500 [SPARK-32428][EXAMPLES] Make BinaryClassificationMetricsExample cons… …istently print the metrics on driver's stdout ### What changes were proposed in this pull request? Call collect on RDD before calling foreach so that it sends the result to the driver node and print it on this node's stdout. ### Why are the changes needed? Some RDDs in this example (e.g., precision, recall) call println without calling collect. If the job is under local mode, it sends the data to the driver node and prints the metrics on the driver's stdout. However if the job is under cluster mode, the job prints the metrics on the executor's stdout. It seems inconsistent compared to the other metrics nothing to do with RDD (e.g., auPRC, auROC) since these metrics always output the result on the driver's stdout. All of the metrics should output its result on the driver's stdout. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? This is example code. It doesn't have any tests. Closes #29222 from titsuki/SPARK-32428. Authored-by: Itsuki Toyota <tits...@cpan.org> Signed-off-by: Sean Owen <sro...@gmail.com> (cherry picked from commit 86ead044e3789b3291a38ec2142cbb343d1290c1) Signed-off-by: Sean Owen <sro...@gmail.com> --- .../spark/examples/mllib/BinaryClassificationMetricsExample.scala | 8 ++++---- .../org/apache/spark/examples/mllib/ChiSqSelectorExample.scala | 2 +- .../apache/spark/examples/mllib/ElementwiseProductExample.scala | 4 ++-- .../scala/org/apache/spark/examples/mllib/NormalizerExample.scala | 4 ++-- .../org/apache/spark/examples/mllib/StandardScalerExample.scala | 4 ++-- .../main/scala/org/apache/spark/examples/mllib/TFIDFExample.scala | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala index c6312d7..a606cc4 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala @@ -59,13 +59,13 @@ object BinaryClassificationMetricsExample { // Precision by threshold val precision = metrics.precisionByThreshold - precision.foreach { case (t, p) => + precision.collect.foreach { case (t, p) => println(s"Threshold: $t, Precision: $p") } // Recall by threshold val recall = metrics.recallByThreshold - recall.foreach { case (t, r) => + recall.collect.foreach { case (t, r) => println(s"Threshold: $t, Recall: $r") } @@ -74,13 +74,13 @@ object BinaryClassificationMetricsExample { // F-measure val f1Score = metrics.fMeasureByThreshold - f1Score.foreach { case (t, f) => + f1Score.collect.foreach { case (t, f) => println(s"Threshold: $t, F-score: $f, Beta = 1") } val beta = 0.5 val fScore = metrics.fMeasureByThreshold(beta) - f1Score.foreach { case (t, f) => + fScore.collect.foreach { case (t, f) => println(s"Threshold: $t, F-score: $f, Beta = 0.5") } diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/ChiSqSelectorExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/ChiSqSelectorExample.scala index 5e400b7..6ed59a3 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/ChiSqSelectorExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/ChiSqSelectorExample.scala @@ -53,7 +53,7 @@ object ChiSqSelectorExample { // $example off$ println("filtered data: ") - filteredData.foreach(x => println(x)) + filteredData.collect.foreach(x => println(x)) sc.stop() } diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala index 1855058..d6ec678 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala @@ -45,10 +45,10 @@ object ElementwiseProductExample { // $example off$ println("transformedData: ") - transformedData.foreach(x => println(x)) + transformedData.collect.foreach(x => println(x)) println("transformedData2: ") - transformedData2.foreach(x => println(x)) + transformedData2.collect.foreach(x => println(x)) sc.stop() } diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/NormalizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/NormalizerExample.scala index b3a9604..b1cad7b 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/NormalizerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/NormalizerExample.scala @@ -46,10 +46,10 @@ object NormalizerExample { // $example off$ println("data1: ") - data1.foreach(x => println(x)) + data1.collect.foreach(x => println(x)) println("data2: ") - data2.foreach(x => println(x)) + data2.collect.foreach(x => println(x)) sc.stop() } diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala index 769fc17..66a608c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala @@ -49,10 +49,10 @@ object StandardScalerExample { // $example off$ println("data1: ") - data1.foreach(x => println(x)) + data1.collect.foreach(x => println(x)) println("data2: ") - data2.foreach(x => println(x)) + data2.collect.foreach(x => println(x)) sc.stop() } diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/TFIDFExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/TFIDFExample.scala index a5bdcd8..14b2a20 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/TFIDFExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/TFIDFExample.scala @@ -55,10 +55,10 @@ object TFIDFExample { // $example off$ println("tfidf: ") - tfidf.foreach(x => println(x)) + tfidf.collect.foreach(x => println(x)) println("tfidfIgnore: ") - tfidfIgnore.foreach(x => println(x)) + tfidfIgnore.collect.foreach(x => println(x)) sc.stop() } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org