[spark] branch branch-3.0 updated: [SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct for readability

yamamuro Sat, 28 Mar 2020 16:52:06 -0700

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 71dcf66  [SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct 
for readability
71dcf66 is described below

commit 71dcf6691a48dd622b83e128aa9be30f757b45ec
Author: Kengo Seki <sek...@apache.org>
AuthorDate: Sun Mar 29 08:48:08 2020 +0900

    [SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct for readability
    
    ### What changes were proposed in this pull request?
    
    This PR replaces the method calls of `toSet.toSeq` with `distinct`.
    
    ### Why are the changes needed?
    
    `toSet.toSeq` is intended to make its elements unique but a bit verbose. 
Using `distinct` instead is easier to understand and improves readability.
    
    ### Does this PR introduce any user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Tested with the existing unit tests and found no problem.
    
    Closes #28062 from sekikn/SPARK-31292.
    
    Authored-by: Kengo Seki <sek...@apache.org>
    Signed-off-by: Takeshi Yamamuro <yamam...@apache.org>
    (cherry picked from commit 0b237bd615da4b2c2b781e72af4ad3a4f2951444)
    Signed-off-by: Takeshi Yamamuro <yamam...@apache.org>
---
 core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala       | 2 +-
 core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala         | 2 +-
 core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala     | 2 +-
 core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala  | 2 +-
 .../test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala  | 2 +-
 sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala              | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala 
b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
index 7dd7fc1..994b363 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
@@ -149,7 +149,7 @@ private[spark] object ResourceUtils extends Logging {
   def listResourceIds(sparkConf: SparkConf, componentName: String): 
Seq[ResourceID] = {
     sparkConf.getAllWithPrefix(s"$componentName.$RESOURCE_PREFIX.").map { case 
(key, _) =>
       key.substring(0, key.indexOf('.'))
-    }.toSet.toSeq.map(name => new ResourceID(componentName, name))
+    }.distinct.map(name => new ResourceID(componentName, name))
   }
 
   def parseAllResourceRequests(
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala 
b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
index 857c89d..15f2161 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -69,7 +69,7 @@ private[spark] class ResultTask[T, U](
   with Serializable {
 
   @transient private[this] val preferredLocs: Seq[TaskLocation] = {
-    if (locs == null) Nil else locs.toSet.toSeq
+    if (locs == null) Nil else locs.distinct
   }
 
   override def runTask(context: TaskContext): U = {
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala 
b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 4c0c30a..a0ba920 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -71,7 +71,7 @@ private[spark] class ShuffleMapTask(
   }
 
   @transient private val preferredLocs: Seq[TaskLocation] = {
-    if (locs == null) Nil else locs.toSet.toSeq
+    if (locs == null) Nil else locs.distinct
   }
 
   override def runTask(context: TaskContext): MapStatus = {
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala 
b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 6a1d460..ed30473 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -408,7 +408,7 @@ private[spark] class TaskSchedulerImpl(
         newExecAvail = true
       }
     }
-    val hosts = offers.map(_.host).toSet.toSeq
+    val hosts = offers.map(_.host).distinct
     for ((host, Some(rack)) <- hosts.zip(getRacksForHosts(hosts))) {
       hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += host
     }
diff --git 
a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala 
b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index e7ecf84..a083cdb 100644
--- 
a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -758,7 +758,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with 
LocalSparkContext with B
         // that are explicitly blacklisted, plus those that have *any* 
executors blacklisted.
         val nodesForBlacklistedExecutors = offers.filter { offer =>
           execBlacklist.contains(offer.executorId)
-        }.map(_.host).toSet.toSeq
+        }.map(_.host).distinct
         val nodesWithAnyBlacklisting = (nodeBlacklist ++ 
nodesForBlacklistedExecutors).toSet
         // Similarly, figure out which executors have any blacklisting.  This 
means all executors
         // that are explicitly blacklisted, plus all executors on nodes that 
are blacklisted.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index d85e23b..b910136 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2455,7 +2455,7 @@ class Dataset[T] private[sql](
   def dropDuplicates(colNames: Seq[String]): Dataset[T] = withTypedPlan {
     val resolver = sparkSession.sessionState.analyzer.resolver
     val allColumns = queryExecution.analyzed.output
-    val groupCols = colNames.toSet.toSeq.flatMap { (colName: String) =>
+    val groupCols = colNames.distinct.flatMap { (colName: String) =>
       // It is possibly there are more than one columns with the same name,
       // so we call filter instead of find.
       val cols = allColumns.filter(col => resolver(col.name, colName))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.0 updated: [SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct for readability

Reply via email to