This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 09564df8485 [SPARK-39147][SQL] Code simplification, use count() instead of filter().size, etc 09564df8485 is described below commit 09564df8485d4ba27ba6d77b18a4635038ab2a1e Author: morvenhuang <morven.hu...@gmail.com> AuthorDate: Wed May 11 18:27:29 2022 -0500 [SPARK-39147][SQL] Code simplification, use count() instead of filter().size, etc ### What changes were proposed in this pull request? Use count() instead of filter().size, use df.count() instead of df.collect().size. ### Why are the changes needed? Code simplification. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #36507 from morvenhuang/SPARK-39147. Authored-by: morvenhuang <morven.hu...@gmail.com> Signed-off-by: Sean Owen <sro...@gmail.com> --- core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala | 2 +- .../org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala | 4 ++-- .../scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala index fe76b1bc322..cf2240a0511 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala @@ -263,7 +263,7 @@ class MapStatusSuite extends SparkFunSuite { val allBlocks = emptyBlocks ++: nonEmptyBlocks val skewThreshold = Utils.median(allBlocks, false) * accurateBlockSkewedFactor - assert(nonEmptyBlocks.filter(_ > skewThreshold).size == + assert(nonEmptyBlocks.count(_ > skewThreshold) == untrackedSkewedBlocksLength + trackedSkewedBlocksLength, "number of skewed block sizes") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala index 3c5ab55a8a7..737d30a41d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala @@ -132,8 +132,8 @@ object StreamingJoinHelper extends PredicateHelper with Logging { leftExpr.collect { case a: AttributeReference => a } ++ rightExpr.collect { case a: AttributeReference => a } ) - if (attributesInCondition.filter { attributesToFindStateWatermarkFor.contains(_) }.size > 1 || - attributesInCondition.filter { attributesWithEventWatermark.contains(_) }.size > 1) { + if (attributesInCondition.count(attributesToFindStateWatermarkFor.contains) > 1 || + attributesInCondition.count(attributesWithEventWatermark.contains) > 1) { // If more than attributes present in condition from one side, then it cannot be solved return None } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 8971f0c70af..d8081f4525a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -622,7 +622,7 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper { // To be conservative here: it's only a guaranteed win if all but at most only one branch // end up being not foldable. private def atMostOneUnfoldable(exprs: Seq[Expression]): Boolean = { - exprs.filterNot(_.foldable).size < 2 + exprs.count(!_.foldable) < 2 } // Not all UnaryExpression can be pushed into (if / case) branches, e.g. Alias. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org