[spark] branch branch-2.4 updated: [SPARK-34275][CORE][SQL][MLLIB][2.4] Replaces filter and size with count

gurwls223 Thu, 28 Jan 2021 02:03:48 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 78bf448  [SPARK-34275][CORE][SQL][MLLIB][2.4] Replaces filter and size 
with count
78bf448 is described below

commit 78bf4480dc4ceb8723b4c425952d8169c8043e5e
Author: yangjie01 <yangji...@baidu.com>
AuthorDate: Thu Jan 28 19:01:45 2021 +0900

    [SPARK-34275][CORE][SQL][MLLIB][2.4] Replaces filter and size with count
    
    ### What changes were proposed in this pull request?
    Use `count` to simplify `find + size(or length)` operation, it's 
semantically consistent, but looks simpler.
    
    **Before**
    ```
    seq.filter(p).size
    ```
    
    **After**
    ```
    seq.count(p)
    ```
    
    ### Why are the changes needed?
    Code Simpilefications.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Pass the Jenkins or GitHub Action
    
    Closes #31376 from LuciferYang/SPARK-34275-24.
    
    Authored-by: yangjie01 <yangji...@baidu.com>
    Signed-off-by: HyukjinKwon <gurwls...@apache.org>
---
 core/src/test/scala/org/apache/spark/SparkContextSuite.scala          | 4 ++--
 .../scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala    | 4 ++--
 .../scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala    | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala 
b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index e1666a3..663306a 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -161,7 +161,7 @@ class SparkContextSuite extends SparkFunSuite with 
LocalSparkContext with Eventu
         }
         x
       }).count()
-      assert(sc.listFiles().filter(_.contains("somesuffix1")).size == 1)
+      assert(sc.listFiles().count(_.contains("somesuffix1")) == 1)
     } finally {
       sc.stop()
     }
@@ -172,7 +172,7 @@ class SparkContextSuite extends SparkFunSuite with 
LocalSparkContext with Eventu
     try {
       sc = new SparkContext(new 
SparkConf().setAppName("test").setMaster("local"))
       sc.addJar(jarPath.toString)
-      assert(sc.listJars().filter(_.contains("TestUDTF.jar")).size == 1)
+      assert(sc.listJars().count(_.contains("TestUDTF.jar")) == 1)
     } finally {
       sc.stop()
     }
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
index 5f9ab98..603b092 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
@@ -56,10 +56,10 @@ class NaiveBayesSuite extends MLTest with 
DefaultReadWriteTest {
   }
 
   def validatePrediction(predictionAndLabels: Seq[Row]): Unit = {
-    val numOfErrorPredictions = predictionAndLabels.filter {
+    val numOfErrorPredictions = predictionAndLabels.count {
       case Row(prediction: Double, label: Double) =>
         prediction != label
-    }.length
+    }
     // At least 80% of the predictions should be on.
     assert(numOfErrorPredictions < predictionAndLabels.length / 5)
   }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
index c5fb173..067169d 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
@@ -147,9 +147,9 @@ class TypedImperativeAggregateSuite extends QueryTest with 
SharedSQLContext {
     val query = df.select(typedMax($"key"), count($"key"), typedMax($"value"),
       count($"value"))
     val maxKey = nullableData.map(_._1).filter(_ != null).max
-    val countKey = nullableData.map(_._1).filter(_ != null).size
+    val countKey = nullableData.map(_._1).count(_ != null)
     val maxValue = nullableData.map(_._2).filter(_ != null).max
-    val countValue = nullableData.map(_._2).filter(_ != null).size
+    val countValue = nullableData.map(_._2).count(_ != null)
     val expected = Seq(Row(maxKey, countKey, maxValue, countValue))
     checkAnswer(query, expected)
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-2.4 updated: [SPARK-34275][CORE][SQL][MLLIB][2.4] Replaces filter and size with count

Reply via email to