This is an automated email from the ASF dual-hosted git repository. sarutak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b6ac331 [MINOR][DOCS] Fix typos in python user guide and "the the" in the whole codebase b6ac331 is described below commit b6ac3311b70d7fdb373e88b8617c74dda63e1c8f Author: sudoliyang <sudo.liy...@gmail.com> AuthorDate: Tue Nov 9 13:54:27 2021 +0900 [MINOR][DOCS] Fix typos in python user guide and "the the" in the whole codebase ### What changes were proposed in this pull request? Fix typos in python user guide and "the the" in the whole codebase. ### Why are the changes needed? Improve readability. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Tested by checking dictionary. Closes #34524 from sudoliyang/master. Authored-by: sudoliyang <sudo.liy...@gmail.com> Signed-off-by: Kousuke Saruta <saru...@oss.nttdata.com> --- core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala | 2 +- python/docs/source/user_guide/pandas_on_spark/typehints.rst | 2 +- python/docs/source/user_guide/python_packaging.rst | 2 +- python/pyspark/rdd.py | 2 +- .../sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala | 2 +- .../test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index ae50a45..55db73a 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -610,7 +610,7 @@ private[spark] class TaskSchedulerImpl( taskSet.getCompletelyExcludedTaskIfAny(hostToExecutors).foreach { taskIndex => // If the taskSet is unschedulable we try to find an existing idle excluded // executor and kill the idle executor and kick off an abortTimer which if it doesn't - // schedule a task within the the timeout will abort the taskSet if we were unable to + // schedule a task within the timeout will abort the taskSet if we were unable to // schedule any task from the taskSet. // Note 1: We keep track of schedulability on a per taskSet basis rather than on a per // task basis. diff --git a/python/docs/source/user_guide/pandas_on_spark/typehints.rst b/python/docs/source/user_guide/pandas_on_spark/typehints.rst index 72519fc..fda400d 100644 --- a/python/docs/source/user_guide/pandas_on_spark/typehints.rst +++ b/python/docs/source/user_guide/pandas_on_spark/typehints.rst @@ -91,7 +91,7 @@ plans to move gradually towards using pandas instances only as the stability bec Type Hinting with Names ----------------------- -This apporach is to overcome the limitations in the existing type +This approach is to overcome the limitations in the existing type hinting especially for DataFrame. When you use a DataFrame as the return type hint, for example, ``DataFrame[int, int]``, there is no way to specify the names of each Series. In the old way, pandas API on Spark just generates the column names as ``c#`` and this easily leads users to lose or forgot the Series mappings. See the example below: diff --git a/python/docs/source/user_guide/python_packaging.rst b/python/docs/source/user_guide/python_packaging.rst index 6409c5f..8a60177 100644 --- a/python/docs/source/user_guide/python_packaging.rst +++ b/python/docs/source/user_guide/python_packaging.rst @@ -249,5 +249,5 @@ For the interactive pyspark shell, the commands are almost the same: An end-to-end Docker example for deploying a standalone PySpark with ``SparkSession.builder`` and PEX can be found `here <https://github.com/criteo/cluster-pack/blob/master/examples/spark-with-S3/README.md>`_ -- it uses cluster-pack, a library on top of PEX that automatizes the the intermediate step of having +- it uses cluster-pack, a library on top of PEX that automatizes the intermediate step of having to create & upload the PEX manually. diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 2f0db7f..6942634 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -2797,7 +2797,7 @@ class RDD(object): Returns ------- :py:class:`pyspark.resource.ResourceProfile` - The the user specified profile or None if none were specified + The user specified profile or None if none were specified Notes ----- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala index 982e428..96c36dd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala @@ -254,7 +254,7 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite { /** * Any FS which supports XAttr must raise an FNFE if the * file is missing. This verifies resilience on a path - * which the the local FS would not normally take. + * which the local FS would not normally take. */ test("Missing File with XAttr") { val missing = new Path(tempDirPath, "missing") diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 2ef2700..628167a 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -76,7 +76,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { * `hive.metastore.warehouse.dir`. * @param useExternalHiveFile whether to load the hive-site.xml from `src/test/noclasspath` or * not, disabled by default - * @param metastore which path the embedded derby database for metastore locates. Use the the + * @param metastore which path the embedded derby database for metastore locates. Use the * global `metastorePath` by default * @param queriesAndExpectedAnswers one or more tuples of query + answer */ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org