[spark] branch master updated: [SPARK-39147][SQL] Code simplification, use count() instead of filter().size, etc

srowen Wed, 11 May 2022 16:27:45 -0700

This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 09564df8485 [SPARK-39147][SQL] Code simplification, use count() 
instead of filter().size, etc
09564df8485 is described below

commit 09564df8485d4ba27ba6d77b18a4635038ab2a1e
Author: morvenhuang <morven.hu...@gmail.com>
AuthorDate: Wed May 11 18:27:29 2022 -0500

    [SPARK-39147][SQL] Code simplification, use count() instead of 
filter().size, etc
    
    ### What changes were proposed in this pull request?
    Use count() instead of filter().size, use df.count() instead of 
df.collect().size.
    
    ### Why are the changes needed?
    Code simplification.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Pass GA.
    
    Closes #36507 from morvenhuang/SPARK-39147.
    
    Authored-by: morvenhuang <morven.hu...@gmail.com>
    Signed-off-by: Sean Owen <sro...@gmail.com>
---
 core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala   | 2 +-
 .../org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala  | 4 ++--
 .../scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git 
a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala 
b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index fe76b1bc322..cf2240a0511 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -263,7 +263,7 @@ class MapStatusSuite extends SparkFunSuite {
     val allBlocks = emptyBlocks ++: nonEmptyBlocks
 
     val skewThreshold = Utils.median(allBlocks, false) * 
accurateBlockSkewedFactor
-    assert(nonEmptyBlocks.filter(_ > skewThreshold).size ==
+    assert(nonEmptyBlocks.count(_ > skewThreshold) ==
       untrackedSkewedBlocksLength + trackedSkewedBlocksLength,
       "number of skewed block sizes")
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
index 3c5ab55a8a7..737d30a41d3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
@@ -132,8 +132,8 @@ object StreamingJoinHelper extends PredicateHelper with 
Logging {
       leftExpr.collect { case a: AttributeReference => a } ++
       rightExpr.collect { case a: AttributeReference => a }
     )
-    if (attributesInCondition.filter { 
attributesToFindStateWatermarkFor.contains(_) }.size > 1 ||
-        attributesInCondition.filter { 
attributesWithEventWatermark.contains(_) }.size > 1) {
+    if 
(attributesInCondition.count(attributesToFindStateWatermarkFor.contains) > 1 ||
+        attributesInCondition.count(attributesWithEventWatermark.contains) > 
1) {
       // If more than attributes present in condition from one side, then it 
cannot be solved
       return None
     }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 8971f0c70af..d8081f4525a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -622,7 +622,7 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] 
with PredicateHelper {
   // To be conservative here: it's only a guaranteed win if all but at most 
only one branch
   // end up being not foldable.
   private def atMostOneUnfoldable(exprs: Seq[Expression]): Boolean = {
-    exprs.filterNot(_.foldable).size < 2
+    exprs.count(!_.foldable) < 2
   }
 
   // Not all UnaryExpression can be pushed into (if / case) branches, e.g. 
Alias.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39147][SQL] Code simplification, use count() instead of filter().size, etc

Reply via email to