This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 71dcf66 [SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct for readability 71dcf66 is described below commit 71dcf6691a48dd622b83e128aa9be30f757b45ec Author: Kengo Seki <sek...@apache.org> AuthorDate: Sun Mar 29 08:48:08 2020 +0900 [SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct for readability ### What changes were proposed in this pull request? This PR replaces the method calls of `toSet.toSeq` with `distinct`. ### Why are the changes needed? `toSet.toSeq` is intended to make its elements unique but a bit verbose. Using `distinct` instead is easier to understand and improves readability. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Tested with the existing unit tests and found no problem. Closes #28062 from sekikn/SPARK-31292. Authored-by: Kengo Seki <sek...@apache.org> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> (cherry picked from commit 0b237bd615da4b2c2b781e72af4ad3a4f2951444) Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala | 2 +- core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala | 2 +- core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala | 2 +- core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala | 2 +- .../test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala | 2 +- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala index 7dd7fc1..994b363 100644 --- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala +++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala @@ -149,7 +149,7 @@ private[spark] object ResourceUtils extends Logging { def listResourceIds(sparkConf: SparkConf, componentName: String): Seq[ResourceID] = { sparkConf.getAllWithPrefix(s"$componentName.$RESOURCE_PREFIX.").map { case (key, _) => key.substring(0, key.indexOf('.')) - }.toSet.toSeq.map(name => new ResourceID(componentName, name)) + }.distinct.map(name => new ResourceID(componentName, name)) } def parseAllResourceRequests( diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala index 857c89d..15f2161 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala @@ -69,7 +69,7 @@ private[spark] class ResultTask[T, U]( with Serializable { @transient private[this] val preferredLocs: Seq[TaskLocation] = { - if (locs == null) Nil else locs.toSet.toSeq + if (locs == null) Nil else locs.distinct } override def runTask(context: TaskContext): U = { diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala index 4c0c30a..a0ba920 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala @@ -71,7 +71,7 @@ private[spark] class ShuffleMapTask( } @transient private val preferredLocs: Seq[TaskLocation] = { - if (locs == null) Nil else locs.toSet.toSeq + if (locs == null) Nil else locs.distinct } override def runTask(context: TaskContext): MapStatus = { diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 6a1d460..ed30473 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -408,7 +408,7 @@ private[spark] class TaskSchedulerImpl( newExecAvail = true } } - val hosts = offers.map(_.host).toSet.toSeq + val hosts = offers.map(_.host).distinct for ((host, Some(rack)) <- hosts.zip(getRacksForHosts(hosts))) { hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += host } diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala index e7ecf84..a083cdb 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala @@ -758,7 +758,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B // that are explicitly blacklisted, plus those that have *any* executors blacklisted. val nodesForBlacklistedExecutors = offers.filter { offer => execBlacklist.contains(offer.executorId) - }.map(_.host).toSet.toSeq + }.map(_.host).distinct val nodesWithAnyBlacklisting = (nodeBlacklist ++ nodesForBlacklistedExecutors).toSet // Similarly, figure out which executors have any blacklisting. This means all executors // that are explicitly blacklisted, plus all executors on nodes that are blacklisted. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index d85e23b..b910136 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2455,7 +2455,7 @@ class Dataset[T] private[sql]( def dropDuplicates(colNames: Seq[String]): Dataset[T] = withTypedPlan { val resolver = sparkSession.sessionState.analyzer.resolver val allColumns = queryExecution.analyzed.output - val groupCols = colNames.toSet.toSeq.flatMap { (colName: String) => + val groupCols = colNames.distinct.flatMap { (colName: String) => // It is possibly there are more than one columns with the same name, // so we call filter instead of find. val cols = allColumns.filter(col => resolver(col.name, colName)) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org