Repository: spark Updated Branches: refs/heads/branch-2.0 7d10e4bdd -> 90e94b826
[MINOR] Fix Typos 'an -> a' ## What changes were proposed in this pull request? `an -> a` Use cmds like `find . -name '*.R' | xargs -i sh -c "grep -in ' an [^aeiou]' {} && echo {}"` to generate candidates, and review them one by one. ## How was this patch tested? manual tests Author: Zheng RuiFeng <ruife...@foxmail.com> Closes #13515 from zhengruifeng/an_a. (cherry picked from commit fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27) Signed-off-by: Sean Owen <so...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/90e94b82 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/90e94b82 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/90e94b82 Branch: refs/heads/branch-2.0 Commit: 90e94b82649d9816cd4065549678b82751238552 Parents: 7d10e4b Author: Zheng RuiFeng <ruife...@foxmail.com> Authored: Mon Jun 6 09:35:47 2016 +0100 Committer: Sean Owen <so...@cloudera.com> Committed: Mon Jun 6 09:35:57 2016 +0100 ---------------------------------------------------------------------- R/pkg/R/utils.R | 2 +- .../src/main/scala/org/apache/spark/Accumulable.scala | 2 +- .../org/apache/spark/api/java/JavaSparkContext.scala | 2 +- .../scala/org/apache/spark/api/python/PythonRDD.scala | 2 +- .../scala/org/apache/spark/deploy/SparkSubmit.scala | 6 +++--- .../src/main/scala/org/apache/spark/rdd/JdbcRDD.scala | 6 +++--- .../main/scala/org/apache/spark/scheduler/Pool.scala | 2 +- .../org/apache/spark/broadcast/BroadcastSuite.scala | 2 +- .../spark/deploy/rest/StandaloneRestSubmitSuite.scala | 2 +- .../test/scala/org/apache/spark/rpc/RpcEnvSuite.scala | 2 +- .../apache/spark/scheduler/DAGSchedulerSuite.scala | 4 ++-- .../org/apache/spark/util/JsonProtocolSuite.scala | 2 +- .../spark/streaming/flume/FlumeBatchFetcher.scala | 2 +- .../spark/graphx/impl/VertexPartitionBaseOps.scala | 2 +- .../scala/org/apache/spark/ml/linalg/Vectors.scala | 2 +- .../src/main/scala/org/apache/spark/ml/Pipeline.scala | 2 +- .../spark/ml/classification/LogisticRegression.scala | 4 ++-- .../org/apache/spark/ml/tree/impl/RandomForest.scala | 2 +- .../mllib/classification/LogisticRegression.scala | 2 +- .../org/apache/spark/mllib/classification/SVM.scala | 2 +- .../spark/mllib/feature/VectorTransformer.scala | 2 +- .../scala/org/apache/spark/mllib/linalg/Vectors.scala | 2 +- .../mllib/linalg/distributed/CoordinateMatrix.scala | 2 +- .../apache/spark/mllib/rdd/MLPairRDDFunctions.scala | 2 +- python/pyspark/ml/classification.py | 4 ++-- python/pyspark/ml/pipeline.py | 2 +- python/pyspark/mllib/classification.py | 2 +- python/pyspark/mllib/common.py | 2 +- python/pyspark/rdd.py | 4 ++-- python/pyspark/sql/session.py | 2 +- python/pyspark/sql/streaming.py | 2 +- python/pyspark/sql/types.py | 2 +- python/pyspark/streaming/dstream.py | 4 ++-- .../src/main/scala/org/apache/spark/sql/Row.scala | 2 +- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 4 ++-- .../sql/catalyst/analysis/FunctionRegistry.scala | 2 +- .../sql/catalyst/analysis/MultiInstanceRelation.scala | 2 +- .../spark/sql/catalyst/catalog/SessionCatalog.scala | 6 +++--- .../sql/catalyst/catalog/functionResources.scala | 2 +- .../sql/catalyst/expressions/ExpectsInputTypes.scala | 2 +- .../spark/sql/catalyst/expressions/Projection.scala | 4 ++-- .../sql/catalyst/expressions/complexTypeCreator.scala | 2 +- .../org/apache/spark/sql/types/AbstractDataType.scala | 2 +- .../scala/org/apache/spark/sql/DataFrameReader.scala | 2 +- .../main/scala/org/apache/spark/sql/SQLContext.scala | 14 +++++++------- .../scala/org/apache/spark/sql/SQLImplicits.scala | 2 +- .../scala/org/apache/spark/sql/SparkSession.scala | 14 +++++++------- .../org/apache/spark/sql/catalyst/SQLBuilder.scala | 2 +- .../aggregate/SortBasedAggregationIterator.scala | 2 +- .../apache/spark/sql/execution/aggregate/udaf.scala | 2 +- .../execution/columnar/GenerateColumnAccessor.scala | 2 +- .../execution/datasources/FileSourceStrategy.scala | 2 +- .../execution/datasources/json/JacksonParser.scala | 2 +- .../datasources/parquet/CatalystRowConverter.scala | 2 +- .../sql/execution/exchange/ExchangeCoordinator.scala | 10 +++++----- .../spark/sql/execution/joins/SortMergeJoinExec.scala | 2 +- .../spark/sql/execution/r/MapPartitionsRWrapper.scala | 2 +- .../scala/org/apache/spark/sql/expressions/udaf.scala | 2 +- .../org/apache/spark/sql/internal/SharedState.scala | 2 +- .../apache/spark/sql/streaming/ContinuousQuery.scala | 2 +- .../org/apache/spark/sql/hive/client/HiveClient.scala | 2 +- .../apache/spark/sql/hive/orc/OrcFileOperator.scala | 2 +- .../spark/sql/hive/execution/HiveComparisonTest.scala | 2 +- .../main/scala/org/apache/spark/streaming/State.scala | 2 +- .../streaming/api/java/JavaStreamingContext.scala | 6 +++--- .../org/apache/spark/streaming/dstream/DStream.scala | 4 ++-- .../spark/streaming/receiver/ReceivedBlock.scala | 2 +- .../scheduler/ReceiverSchedulingPolicy.scala | 2 +- .../org/apache/spark/streaming/CheckpointSuite.scala | 2 +- .../scheduler/rate/PIDRateEstimatorSuite.scala | 2 +- 70 files changed, 102 insertions(+), 102 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/R/pkg/R/utils.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index e734366..12e4f4f 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -489,7 +489,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) { # checkedFunc An environment of function objects examined during cleanClosure. It can be # considered as a "name"-to-"list of functions" mapping. # return value -# a new version of func that has an correct environment (closure). +# a new version of func that has a correct environment (closure). cleanClosure <- function(func, checkedFuncs = new.env()) { if (is.function(func)) { newEnv <- new.env(parent = .GlobalEnv) http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/main/scala/org/apache/spark/Accumulable.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/Accumulable.scala b/core/src/main/scala/org/apache/spark/Accumulable.scala index 812145a..5532931 100644 --- a/core/src/main/scala/org/apache/spark/Accumulable.scala +++ b/core/src/main/scala/org/apache/spark/Accumulable.scala @@ -28,7 +28,7 @@ import org.apache.spark.util.{AccumulatorContext, AccumulatorMetadata, LegacyAcc /** - * A data type that can be accumulated, i.e. has an commutative and associative "add" operation, + * A data type that can be accumulated, i.e. has a commutative and associative "add" operation, * but where the result type, `R`, may be different from the element type being added, `T`. * * You must define how to add data, and how to merge two of these together. For some data types, http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala index bfb6a35..485a8b4 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala @@ -789,7 +789,7 @@ class JavaSparkContext(val sc: SparkContext) def cancelAllJobs(): Unit = sc.cancelAllJobs() /** - * Returns an Java map of JavaRDDs that have marked themselves as persistent via cache() call. + * Returns a Java map of JavaRDDs that have marked themselves as persistent via cache() call. * Note that this does not necessarily mean the caching or computation was successful. */ def getPersistentRDDs: JMap[java.lang.Integer, JavaRDD[_]] = { http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index ab5b6c8..2822eb5 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -919,7 +919,7 @@ private class PythonAccumulatorParam(@transient private val serverHost: String, } /** - * An Wrapper for Python Broadcast, which is written into disk by Python. It also will + * A Wrapper for Python Broadcast, which is written into disk by Python. It also will * write the data into disk after deserialization, then Python can read it from disks. */ // scalastyle:off no.finalize http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 78606e0..9be4cad 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -408,12 +408,12 @@ object SparkSubmit { printErrorAndExit("SparkR is not supported for Mesos cluster.") } - // If we're running a R app, set the main class to our specific R runner + // If we're running an R app, set the main class to our specific R runner if (args.isR && deployMode == CLIENT) { if (args.primaryResource == SPARKR_SHELL) { args.mainClass = "org.apache.spark.api.r.RBackend" } else { - // If a R file is provided, add it to the child arguments and list of files to deploy. + // If an R file is provided, add it to the child arguments and list of files to deploy. // Usage: RRunner <main R file> [app arguments] args.mainClass = "org.apache.spark.deploy.RRunner" args.childArgs = ArrayBuffer(args.primaryResource) ++ args.childArgs @@ -422,7 +422,7 @@ object SparkSubmit { } if (isYarnCluster && args.isR) { - // In yarn-cluster mode for a R app, add primary resource to files + // In yarn-cluster mode for an R app, add primary resource to files // that can be distributed with the job args.files = mergeFileLists(args.files, args.primaryResource) } http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala index 5426bf8..2f42916 100644 --- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala @@ -34,7 +34,7 @@ private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) e // TODO: Expose a jdbcRDD function in SparkContext and mark this as semi-private /** - * An RDD that executes an SQL query on a JDBC connection and reads results. + * An RDD that executes a SQL query on a JDBC connection and reads results. * For usage example, see test case JdbcRDDSuite. * * @param getConnection a function that returns an open Connection. @@ -138,7 +138,7 @@ object JdbcRDD { } /** - * Create an RDD that executes an SQL query on a JDBC connection and reads results. + * Create an RDD that executes a SQL query on a JDBC connection and reads results. * For usage example, see test case JavaAPISuite.testJavaJdbcRDD. * * @param connectionFactory a factory that returns an open Connection. @@ -178,7 +178,7 @@ object JdbcRDD { } /** - * Create an RDD that executes an SQL query on a JDBC connection and reads results. Each row is + * Create an RDD that executes a SQL query on a JDBC connection and reads results. Each row is * converted into a `Object` array. For usage example, see test case JavaAPISuite.testJavaJdbcRDD. * * @param connectionFactory a factory that returns an open Connection. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/main/scala/org/apache/spark/scheduler/Pool.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala index 5987cfe..266afaf 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala @@ -26,7 +26,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.scheduler.SchedulingMode.SchedulingMode /** - * An Schedulable entity that represents collection of Pools or TaskSetManagers + * A Schedulable entity that represents collection of Pools or TaskSetManagers */ private[spark] class Pool( val poolName: String, http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala index 6657104..9736763 100644 --- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala +++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala @@ -138,7 +138,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext { } /** - * Verify the persistence of state associated with an TorrentBroadcast in a local-cluster. + * Verify the persistence of state associated with a TorrentBroadcast in a local-cluster. * * This test creates a broadcast variable, uses it on all executors, and then unpersists it. * In between each step, this test verifies that the broadcast blocks are present only on the http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala index a7bb9aa..dd50e33 100644 --- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala @@ -408,7 +408,7 @@ class StandaloneRestSubmitSuite extends SparkFunSuite with BeforeAndAfterEach { /** * Start a [[StandaloneRestServer]] that communicates with the given endpoint. - * If `faulty` is true, start an [[FaultyStandaloneRestServer]] instead. + * If `faulty` is true, start a [[FaultyStandaloneRestServer]] instead. * Return the master URL that corresponds to the address of this server. */ private def startServer( http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala index 505cd47..acdf21d 100644 --- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala @@ -489,7 +489,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { /** * Setup an [[RpcEndpoint]] to collect all network events. * - * @return the [[RpcEndpointRef]] and an `ConcurrentLinkedQueue` that contains network events. + * @return the [[RpcEndpointRef]] and a `ConcurrentLinkedQueue` that contains network events. */ private def setupNetworkEndpoint( _env: RpcEnv, http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index 088a476..5bcc8ff 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -1703,7 +1703,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou } test("reduce tasks should be placed locally with map output") { - // Create an shuffleMapRdd with 1 partition + // Create a shuffleMapRdd with 1 partition val shuffleMapRdd = new MyRDD(sc, 1, Nil) val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2)) val shuffleId = shuffleDep.shuffleId @@ -1724,7 +1724,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou test("reduce task locality preferences should only include machines with largest map outputs") { val numMapTasks = 4 - // Create an shuffleMapRdd with more partitions + // Create a shuffleMapRdd with more partitions val shuffleMapRdd = new MyRDD(sc, numMapTasks, Nil) val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(1)) val shuffleId = shuffleDep.shuffleId http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala index 6fda737..7a1ee03 100644 --- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala @@ -258,7 +258,7 @@ class JsonProtocolSuite extends SparkFunSuite { } test("FetchFailed backwards compatibility") { - // FetchFailed in Spark 1.1.0 does not have an "Message" property. + // FetchFailed in Spark 1.1.0 does not have a "Message" property. val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19, "ignored") val oldEvent = JsonProtocol.taskEndReasonToJson(fetchFailed) http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala ---------------------------------------------------------------------- diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala index 5f234b1..8af7c23 100644 --- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala +++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala @@ -26,7 +26,7 @@ import org.apache.spark.streaming.flume.sink._ /** * This class implements the core functionality of [[FlumePollingReceiver]]. When started it * pulls data from Flume, stores it to Spark and then sends an Ack or Nack. This class should be - * run via an [[java.util.concurrent.Executor]] as this implements [[Runnable]] + * run via a [[java.util.concurrent.Executor]] as this implements [[Runnable]] * * @param receiver The receiver that owns this instance. */ http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala ---------------------------------------------------------------------- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala index 31373a5..4359457 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala @@ -27,7 +27,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.util.collection.BitSet /** - * An class containing additional operations for subclasses of VertexPartitionBase that provide + * A class containing additional operations for subclasses of VertexPartitionBase that provide * implicit evidence of membership in the `VertexPartitionBaseOpsConstructor` typeclass (for * example, [[VertexPartition.VertexPartitionOpsConstructor]]). */ http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala ---------------------------------------------------------------------- diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala index 909fec1..c74e5d4 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala @@ -548,7 +548,7 @@ object DenseVector { } /** - * A sparse vector represented by an index array and an value array. + * A sparse vector represented by an index array and a value array. * * @param size size of the vector. * @param indices index array, assume to be strictly increasing. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index b02aea9..25e56d7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -82,7 +82,7 @@ abstract class PipelineStage extends Params with Logging { * be called on the input dataset to fit a model. Then the model, which is a transformer, will be * used to transform the dataset as the input to the next stage. If a stage is a [[Transformer]], * its [[Transformer#transform]] method will be called to produce the dataset for the next stage. - * The fitted model from a [[Pipeline]] is an [[PipelineModel]], which consists of fitted models and + * The fitted model from a [[Pipeline]] is a [[PipelineModel]], which consists of fitted models and * transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as * an identity transformer. */ http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index cecf3c4..1ea4d90 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -854,7 +854,7 @@ class BinaryLogisticRegressionSummary private[classification] ( /** * Returns the receiver operating characteristic (ROC) curve, - * which is an Dataframe having two fields (FPR, TPR) + * which is a Dataframe having two fields (FPR, TPR) * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it. * * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]]. @@ -874,7 +874,7 @@ class BinaryLogisticRegressionSummary private[classification] ( lazy val areaUnderROC: Double = binaryMetrics.areaUnderROC() /** - * Returns the precision-recall curve, which is an Dataframe containing + * Returns the precision-recall curve, which is a Dataframe containing * two fields recall, precision with (0.0, 1.0) prepended to it. * * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]]. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala index be3792e..71c8c42 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala @@ -491,7 +491,7 @@ private[spark] object RandomForest extends Logging { timer.start("chooseSplits") // In each partition, iterate all instances and compute aggregate stats for each node, - // yield an (nodeIndex, nodeAggregateStats) pair for each node. + // yield a (nodeIndex, nodeAggregateStats) pair for each node. // After a `reduceByKey` operation, // stats of a node will be shuffled to a particular partition and be combined together, // then best splits for nodes are found there. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index e4cc784..e4cbf5a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -86,7 +86,7 @@ class LogisticRegressionModel @Since("1.3.0") ( /** * Sets the threshold that separates positive predictions from negative predictions * in Binary Logistic Regression. An example with prediction score greater than or equal to - * this threshold is identified as an positive, and negative otherwise. The default value is 0.5. + * this threshold is identified as a positive, and negative otherwise. The default value is 0.5. * It is only used for binary classification. */ @Since("1.0.0") http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala index 783c1c8..7c3ccbb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -44,7 +44,7 @@ class SVMModel @Since("1.1.0") ( /** * Sets the threshold that separates positive predictions from negative predictions. An example - * with prediction score greater than or equal to this threshold is identified as an positive, + * with prediction score greater than or equal to this threshold is identified as a positive, * and negative otherwise. The default value is 0.0. */ @Since("1.0.0") http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala index ca73851..9db7250 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala @@ -53,7 +53,7 @@ trait VectorTransformer extends Serializable { } /** - * Applies transformation on an JavaRDD[Vector]. + * Applies transformation on a JavaRDD[Vector]. * * @param data JavaRDD[Vector] to be transformed. * @return transformed JavaRDD[Vector]. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 02fd60d..91f0658 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -731,7 +731,7 @@ object DenseVector { } /** - * A sparse vector represented by an index array and an value array. + * A sparse vector represented by an index array and a value array. * * @param size size of the vector. * @param indices index array, assume to be strictly increasing. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala index 97b03b3..008b03d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala @@ -24,7 +24,7 @@ import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors} import org.apache.spark.rdd.RDD /** - * Represents an entry in an distributed matrix. + * Represents an entry in a distributed matrix. * @param i row index * @param j column index * @param value value of the entry http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala index af4bc9f..e28e1af 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala @@ -47,7 +47,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se combOp = (queue1, queue2) => { queue1 ++= queue2 } - ).mapValues(_.toArray.sorted(ord.reverse)) // This is an min-heap, so we reverse the order. + ).mapValues(_.toArray.sorted(ord.reverse)) // This is a min-heap, so we reverse the order. } } http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/python/pyspark/ml/classification.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 7710fdf..77badeb 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -351,7 +351,7 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary): def roc(self): """ Returns the receiver operating characteristic (ROC) curve, - which is an Dataframe having two fields (FPR, TPR) with + which is a Dataframe having two fields (FPR, TPR) with (0.0, 0.0) prepended and (1.0, 1.0) appended to it. .. seealso:: `Wikipedia reference \ @@ -380,7 +380,7 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary): @since("2.0.0") def pr(self): """ - Returns the precision-recall curve, which is an Dataframe + Returns the precision-recall curve, which is a Dataframe containing two fields recall, precision with (0.0, 1.0) prepended to it. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/python/pyspark/ml/pipeline.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py index eb6d331..0777527 100644 --- a/python/pyspark/ml/pipeline.py +++ b/python/pyspark/ml/pipeline.py @@ -42,7 +42,7 @@ class Pipeline(Estimator, MLReadable, MLWritable): stage. If a stage is a :py:class:`Transformer`, its :py:meth:`Transformer.transform` method will be called to produce the dataset for the next stage. The fitted model from a - :py:class:`Pipeline` is an :py:class:`PipelineModel`, which + :py:class:`Pipeline` is a :py:class:`PipelineModel`, which consists of fitted models and transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as an identity transformer. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/python/pyspark/mllib/classification.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index f186217..3734f87 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -52,7 +52,7 @@ class LinearClassificationModel(LinearModel): Sets the threshold that separates positive predictions from negative predictions. An example with prediction score greater - than or equal to this threshold is identified as an positive, + than or equal to this threshold is identified as a positive, and negative otherwise. It is used for binary classification only. """ http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/python/pyspark/mllib/common.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py index 6bc2b1e..31afdf5 100644 --- a/python/pyspark/mllib/common.py +++ b/python/pyspark/mllib/common.py @@ -60,7 +60,7 @@ _picklable_classes = [ # this will call the MLlib version of pythonToJava() def _to_java_object_rdd(rdd): - """ Return an JavaRDD of Object by unpickling + """ Return a JavaRDD of Object by unpickling It will convert each Python object into Java object by Pyrolite, whenever the RDD is serialized in batch or not. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/python/pyspark/rdd.py ---------------------------------------------------------------------- diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 411e377..6afe769 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -1040,7 +1040,7 @@ class RDD(object): If the elements in RDD do not vary (max == min) always returns a single bucket. - It will return an tuple of buckets and histogram. + It will return a tuple of buckets and histogram. >>> rdd = sc.parallelize(range(51)) >>> rdd.histogram(2) @@ -2211,7 +2211,7 @@ class RDD(object): return values.collect() def _to_java_object_rdd(self): - """ Return an JavaRDD of Object by unpickling + """ Return a JavaRDD of Object by unpickling It will convert each Python object into Java object by Pyrolite, whenever the RDD is serialized in batch or not. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/python/pyspark/sql/session.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 8f7dcb5..7c9f532 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -360,7 +360,7 @@ class SparkSession(object): def _createFromLocal(self, data, schema): """ - Create an RDD for DataFrame from an list or pandas.DataFrame, returns + Create an RDD for DataFrame from a list or pandas.DataFrame, returns the RDD and schema. """ # make sure data could consumed multiple times http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/python/pyspark/sql/streaming.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index cd75622..580aba6 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -72,7 +72,7 @@ class ContinuousQuery(object): @since(2.0) def processAllAvailable(self): - """Blocks until all available data in the source has been processed an committed to the + """Blocks until all available data in the source has been processed and committed to the sink. This method is intended for testing. Note that in the case of continually arriving data, this method may block forever. Additionally, this method is only guaranteed to block until data that has been synchronously appended data to a stream source prior to invocation. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/python/pyspark/sql/types.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 7d8d023..bb2b954 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -1046,7 +1046,7 @@ def _need_converter(dataType): def _create_converter(dataType): - """Create an converter to drop the names of fields in obj """ + """Create a converter to drop the names of fields in obj """ if not _need_converter(dataType): return lambda x: x http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/python/pyspark/streaming/dstream.py ---------------------------------------------------------------------- diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py index fb6c66f..59977dc 100644 --- a/python/pyspark/streaming/dstream.py +++ b/python/pyspark/streaming/dstream.py @@ -608,8 +608,8 @@ class DStream(object): class TransformedDStream(DStream): """ - TransformedDStream is an DStream generated by an Python function - transforming each RDD of an DStream to another RDDs. + TransformedDStream is a DStream generated by an Python function + transforming each RDD of a DStream to another RDDs. Multiple continuous transformations of DStream can be combined into one transformation. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index 391001d..d83eef7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -300,7 +300,7 @@ trait Row extends Serializable { getMap[K, V](i).asJava /** - * Returns the value at position i of struct type as an [[Row]] object. + * Returns the value at position i of struct type as a [[Row]] object. * * @throws ClassCastException when data type does not match. */ http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 4f6b483..4446140 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.toPrettySQL import org.apache.spark.sql.types._ /** - * A trivial [[Analyzer]] with an dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]]. + * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]]. * Used for testing when all relations are already filled in and the analyzer needs only * to resolve attribute references. */ @@ -1496,7 +1496,7 @@ class Analyzer( * This rule handles three cases: * - A [[Project]] having [[WindowExpression]]s in its projectList; * - An [[Aggregate]] having [[WindowExpression]]s in its aggregateExpressions. - * - An [[Filter]]->[[Aggregate]] pattern representing GROUP BY with a HAVING + * - A [[Filter]]->[[Aggregate]] pattern representing GROUP BY with a HAVING * clause and the [[Aggregate]] has [[WindowExpression]]s in its aggregateExpressions. * Note: If there is a GROUP BY clause in the query, aggregations and corresponding * filters (expressions in the HAVING clause) should be evaluated before any http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index eca837c..a7388c7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -410,7 +410,7 @@ object FunctionRegistry { case Failure(e) => throw new AnalysisException(e.getMessage) } } else { - // Otherwise, find an ctor method that matches the number of arguments, and use that. + // Otherwise, find a constructor method that matches the number of arguments, and use that. val params = Seq.fill(expressions.size)(classOf[Expression]) val f = Try(tag.runtimeClass.getDeclaredConstructor(params : _*)) match { case Success(e) => http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala index 394be47..95a3837 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan /** - * A trait that should be mixed into query operators where an single instance might appear multiple + * A trait that should be mixed into query operators where a single instance might appear multiple * times in a logical query plan. It is invalid to have multiple copies of the same attribute * produced by distinct operators in a query tree as this breaks the guarantee that expression * ids, which are used to differentiate attributes, are unique. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 371c198..943d107 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -241,7 +241,7 @@ class SessionCatalog( /** * Retrieve the metadata of an existing metastore table. * If no database is specified, assume the table is in the current database. - * If the specified table is not found in the database then an [[NoSuchTableException]] is thrown. + * If the specified table is not found in the database then a [[NoSuchTableException]] is thrown. */ def getTableMetadata(name: TableIdentifier): CatalogTable = { val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase)) @@ -266,7 +266,7 @@ class SessionCatalog( /** * Load files stored in given path into an existing metastore table. * If no database is specified, assume the table is in the current database. - * If the specified table is not found in the database then an [[NoSuchTableException]] is thrown. + * If the specified table is not found in the database then a [[NoSuchTableException]] is thrown. */ def loadTable( name: TableIdentifier, @@ -283,7 +283,7 @@ class SessionCatalog( /** * Load files stored in given path into the partition of an existing metastore table. * If no database is specified, assume the table is in the current database. - * If the specified table is not found in the database then an [[NoSuchTableException]] is thrown. + * If the specified table is not found in the database then a [[NoSuchTableException]] is thrown. */ def loadPartition( name: TableIdentifier, http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala index c66d08d..8e46b96 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.catalog import org.apache.spark.sql.AnalysisException -/** An trait that represents the type of a resourced needed by a function. */ +/** A trait that represents the type of a resourced needed by a function. */ abstract class FunctionResourceType(val resourceType: String) object JarResource extends FunctionResourceType("jar") http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala index b3dfac8..c15a2df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.types.AbstractDataType /** - * An trait that gets mixin to define the expected input types of an expression. + * A trait that gets mixin to define the expected input types of an expression. * * This trait is typically used by operator expressions (e.g. [[Add]], [[Subtract]]) to define * expected input types without any implicit casting. http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala index 27ad8e4..c8d1866 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala @@ -158,7 +158,7 @@ object UnsafeProjection { object FromUnsafeProjection { /** - * Returns an Projection for given StructType. + * Returns a Projection for given StructType. */ def apply(schema: StructType): Projection = { apply(schema.fields.map(_.dataType)) @@ -172,7 +172,7 @@ object FromUnsafeProjection { } /** - * Returns an Projection for given sequence of Expressions (bounded). + * Returns a Projection for given sequence of Expressions (bounded). */ private def create(exprs: Seq[Expression]): Projection = { GenerateSafeProjection.generate(exprs) http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index f60d278..d603d3c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -91,7 +91,7 @@ case class CreateMap(children: Seq[Expression]) extends Expression { override def checkInputDataTypes(): TypeCheckResult = { if (children.size % 2 != 0) { - TypeCheckResult.TypeCheckFailure(s"$prettyName expects an positive even number of arguments.") + TypeCheckResult.TypeCheckFailure(s"$prettyName expects a positive even number of arguments.") } else if (keys.map(_.dataType).distinct.length > 1) { TypeCheckResult.TypeCheckFailure("The given keys of function map should all be the same " + "type, but they are " + keys.map(_.dataType.simpleString).mkString("[", ", ", "]")) http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala index 03ea349..65eae86 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala @@ -148,7 +148,7 @@ abstract class NumericType extends AtomicType { // implicitly[Numeric[JvmType]] to be valid, we have to change JvmType from a type variable to a // type parameter and add a numeric annotation (i.e., [JvmType : Numeric]). This gets // desugared by the compiler into an argument to the objects constructor. This means there is no - // longer an no argument constructor and thus the JVM cannot serialize the object anymore. + // longer a no argument constructor and thus the JVM cannot serialize the object anymore. private[sql] val numeric: Numeric[InternalType] } http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 88fa5cd..b248583 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -314,7 +314,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { def json(paths: String*): DataFrame = format("json").load(paths : _*) /** - * Loads an `JavaRDD[String]` storing JSON objects (one object per record) and + * Loads a `JavaRDD[String]` storing JSON objects (one object per record) and * returns the result as a [[DataFrame]]. * * Unless the schema is specified using [[schema]] function, this function goes through the http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 2e14c5d..0fb2400 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -370,7 +370,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: DeveloperApi :: - * Creates a [[DataFrame]] from an [[JavaRDD]] containing [[Row]]s using the given schema. + * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided RDD matches * the provided schema. Otherwise, there will be runtime exception. * @@ -384,7 +384,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: DeveloperApi :: - * Creates a [[DataFrame]] from an [[java.util.List]] containing [[Row]]s using the given schema. + * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided List matches * the provided schema. Otherwise, there will be runtime exception. * @@ -421,7 +421,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * Applies a schema to an List of Java Beans. + * Applies a schema to a List of Java Beans. * * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. @@ -552,7 +552,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: Experimental :: * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements - * in an range from 0 to `end` (exclusive) with step value 1. + * in a range from 0 to `end` (exclusive) with step value 1. * * @since 2.0.0 * @group dataset @@ -563,7 +563,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: Experimental :: * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements - * in an range from `start` to `end` (exclusive) with step value 1. + * in a range from `start` to `end` (exclusive) with step value 1. * * @since 2.0.0 * @group dataset @@ -574,7 +574,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: Experimental :: * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements - * in an range from `start` to `end` (exclusive) with an step value. + * in a range from `start` to `end` (exclusive) with a step value. * * @since 2.0.0 * @group dataset @@ -587,7 +587,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: Experimental :: * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements - * in an range from `start` to `end` (exclusive) with an step value, with partition number + * in a range from `start` to `end` (exclusive) with a step value, with partition number * specified. * * @since 2.0.0 http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala index b7ea2a8..4409525 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala @@ -33,7 +33,7 @@ abstract class SQLImplicits { protected def _sqlContext: SQLContext /** - * Converts $"col name" into an [[Column]]. + * Converts $"col name" into a [[Column]]. * * @since 2.0.0 */ http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index 7d7fd03..f5b16d0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -305,7 +305,7 @@ class SparkSession private( /** * :: DeveloperApi :: - * Creates a [[DataFrame]] from an [[JavaRDD]] containing [[Row]]s using the given schema. + * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided RDD matches * the provided schema. Otherwise, there will be runtime exception. * @@ -319,7 +319,7 @@ class SparkSession private( /** * :: DeveloperApi :: - * Creates a [[DataFrame]] from an [[java.util.List]] containing [[Row]]s using the given schema. + * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided List matches * the provided schema. Otherwise, there will be runtime exception. * @@ -365,7 +365,7 @@ class SparkSession private( } /** - * Applies a schema to an List of Java Beans. + * Applies a schema to a List of Java Beans. * * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. @@ -475,7 +475,7 @@ class SparkSession private( /** * :: Experimental :: * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements - * in an range from 0 to `end` (exclusive) with step value 1. + * in a range from 0 to `end` (exclusive) with step value 1. * * @since 2.0.0 * @group dataset @@ -486,7 +486,7 @@ class SparkSession private( /** * :: Experimental :: * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements - * in an range from `start` to `end` (exclusive) with step value 1. + * in a range from `start` to `end` (exclusive) with step value 1. * * @since 2.0.0 * @group dataset @@ -499,7 +499,7 @@ class SparkSession private( /** * :: Experimental :: * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements - * in an range from `start` to `end` (exclusive) with an step value. + * in a range from `start` to `end` (exclusive) with a step value. * * @since 2.0.0 * @group dataset @@ -512,7 +512,7 @@ class SparkSession private( /** * :: Experimental :: * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements - * in an range from `start` to `end` (exclusive) with an step value, with partition number + * in a range from `start` to `end` (exclusive) with a step value, with partition number * specified. * * @since 2.0.0 http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala index 9dc3679..a8cc72f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala @@ -274,7 +274,7 @@ class SQLBuilder(logicalPlan: LogicalPlan) extends Logging { // 5. the table alias for output columns of generator. // 6. the AS keyword // 7. the column alias, can be more than one, e.g. AS key, value - // An concrete example: "tbl LATERAL VIEW EXPLODE(map_col) sub_q AS key, value", and the builder + // A concrete example: "tbl LATERAL VIEW EXPLODE(map_col) sub_q AS key, value", and the builder // will put it in FROM clause later. build( childSQL, http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala index f392b13..3f7f849 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala @@ -86,7 +86,7 @@ class SortBasedAggregationIterator( // The aggregation buffer used by the sort-based aggregation. private[this] val sortBasedAggregationBuffer: MutableRow = newBuffer - // An SafeProjection to turn UnsafeRow into GenericInternalRow, because UnsafeRow can't be + // A SafeProjection to turn UnsafeRow into GenericInternalRow, because UnsafeRow can't be // compared to MutableRow (aggregation buffer) directly. private[this] val safeProj: Projection = FromUnsafeProjection(valueAttributes.map(_.dataType)) http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala index 4ceb710..b047bc0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala @@ -202,7 +202,7 @@ sealed trait BufferSetterGetterUtils { } /** - * A Mutable [[Row]] representing an mutable aggregation buffer. + * A Mutable [[Row]] representing a mutable aggregation buffer. */ private[sql] class MutableAggregationBufferImpl ( schema: StructType, http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala index 1041bab..7a14879 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala @@ -58,7 +58,7 @@ class MutableUnsafeRow(val writer: UnsafeRowWriter) extends GenericMutableRow(nu } /** - * Generates bytecode for an [[ColumnarIterator]] for columnar cache. + * Generates bytecode for a [[ColumnarIterator]] for columnar cache. */ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarIterator] with Logging { http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala index 350508c..7503285 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.execution.SparkPlan * is only done on top level columns, but formats should support pruning of nested columns as * well. * - Construct a reader function by passing filters and the schema into the FileFormat. - * - Using an partition pruning predicates, enumerate the list of files that should be read. + * - Using a partition pruning predicates, enumerate the list of files that should be read. * - Split the files into tasks and construct a FileScanRDD. * - Add any projection or filters that must be evaluated after the scan. * http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala index aeee260..733fcbf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala @@ -50,7 +50,7 @@ object JacksonParser extends Logging { /** * Parse the current token (and related children) according to a desired schema - * This is an wrapper for the method `convertField()` to handle a row wrapped + * This is a wrapper for the method `convertField()` to handle a row wrapped * with an array. */ def convertRootField( http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala index 6bf82be..85b0bc1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala @@ -68,7 +68,7 @@ private[parquet] trait HasParentContainerUpdater { } /** - * A convenient converter class for Parquet group types with an [[HasParentContainerUpdater]]. + * A convenient converter class for Parquet group types with a [[HasParentContainerUpdater]]. */ private[parquet] abstract class CatalystGroupConverter(val updater: ParentContainerUpdater) extends GroupConverter with HasParentContainerUpdater http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala index fb60d68..2ea6ee3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala @@ -47,10 +47,10 @@ import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan} * partitions. * * The workflow of this coordinator is described as follows: - * - Before the execution of a [[SparkPlan]], for an [[ShuffleExchange]] operator, + * - Before the execution of a [[SparkPlan]], for a [[ShuffleExchange]] operator, * if an [[ExchangeCoordinator]] is assigned to it, it registers itself to this coordinator. * This happens in the `doPrepare` method. - * - Once we start to execute a physical plan, an [[ShuffleExchange]] registered to this + * - Once we start to execute a physical plan, a [[ShuffleExchange]] registered to this * coordinator will call `postShuffleRDD` to get its corresponding post-shuffle * [[ShuffledRowRDD]]. * If this coordinator has made the decision on how to shuffle data, this [[ShuffleExchange]] @@ -61,7 +61,7 @@ import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan} * post-shuffle partitions and pack multiple pre-shuffle partitions with continuous indices * to a single post-shuffle partition whenever necessary. * - Finally, this coordinator will create post-shuffle [[ShuffledRowRDD]]s for all registered - * [[ShuffleExchange]]s. So, when an [[ShuffleExchange]] calls `postShuffleRDD`, this coordinator + * [[ShuffleExchange]]s. So, when a [[ShuffleExchange]] calls `postShuffleRDD`, this coordinator * can lookup the corresponding [[RDD]]. * * The strategy used to determine the number of post-shuffle partitions is described as follows. @@ -98,8 +98,8 @@ private[sql] class ExchangeCoordinator( @volatile private[this] var estimated: Boolean = false /** - * Registers an [[ShuffleExchange]] operator to this coordinator. This method is only allowed to - * be called in the `doPrepare` method of an [[ShuffleExchange]] operator. + * Registers a [[ShuffleExchange]] operator to this coordinator. This method is only allowed to + * be called in the `doPrepare` method of a [[ShuffleExchange]] operator. */ @GuardedBy("this") def registerExchange(exchange: ShuffleExchange): Unit = synchronized { http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala index f0efa52..32f0bc5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} import org.apache.spark.util.collection.BitSet /** - * Performs an sort merge join of two child relations. + * Performs a sort merge join of two child relations. */ case class SortMergeJoinExec( leftKeys: Seq[Expression], http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala index dc6f2ef..6c76328 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala @@ -40,7 +40,7 @@ private[sql] case class MapPartitionsRWrapper( val (newIter, deserializer, colNames) = if (!isSerializedRData) { - // Serialize each row into an byte array that can be deserialized in the R worker + // Serialize each row into a byte array that can be deserialized in the R worker (iter.asInstanceOf[Iterator[Row]].map {row => rowToRBytes(row)}, SerializationFormats.ROW, inputSchema.fieldNames) } else { http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala index 4892591..eac658c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala @@ -133,7 +133,7 @@ abstract class UserDefinedAggregateFunction extends Serializable { /** * :: Experimental :: - * A [[Row]] representing an mutable aggregation buffer. + * A [[Row]] representing a mutable aggregation buffer. * * This is not meant to be extended outside of Spark. */ http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index 9f6137d..0d6f984 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -52,7 +52,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) { org.apache.spark.util.Utils.getContextOrSparkClassLoader) /** - * Create a SQLListener then add it into SparkContext, and create an SQLTab if there is SparkUI. + * Create a SQLListener then add it into SparkContext, and create a SQLTab if there is SparkUI. */ private def createListenerAndUI(sc: SparkContext): SQLListener = { if (SparkSession.sqlListener.get() == null) { http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala index 451cfd8..3bbb0b8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala @@ -93,7 +93,7 @@ trait ContinuousQuery { def awaitTermination(timeoutMs: Long): Boolean /** - * Blocks until all available data in the source has been processed an committed to the sink. + * Blocks until all available data in the source has been processed and committed to the sink. * This method is intended for testing. Note that in the case of continually arriving data, this * method may block forever. Additionally, this method is only guaranteed to block until data that * has been synchronously appended data to a [[org.apache.spark.sql.execution.streaming.Source]] http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala index befdda1..5f89696 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala @@ -218,7 +218,7 @@ private[hive] trait HiveClient { /** Create a function in an existing database. */ def createFunction(db: String, func: CatalogFunction): Unit - /** Drop an existing function an the database. */ + /** Drop an existing function in the database. */ def dropFunction(db: String, name: String): Unit /** Rename an existing function in the database. */ http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala index 0589c8e..f5db73b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.types.StructType private[orc] object OrcFileOperator extends Logging { /** * Retrieves an ORC file reader from a given path. The path can point to either a directory or a - * single ORC file. If it points to an directory, it picks any non-empty ORC file within that + * single ORC file. If it points to a directory, it picks any non-empty ORC file within that * directory. * * The reader returned by this method is mainly used for two purposes: http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 65d53de..a765214 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.hive.test.{TestHive, TestHiveQueryExecution} * Allows the creations of tests that execute the same query against both hive * and catalyst, comparing the results. * - * The "golden" results from Hive are cached in an retrieved both from the classpath and + * The "golden" results from Hive are cached in and retrieved both from the classpath and * [[answerCache]] to speed up testing. * * See the documentation of public vals in this class for information on how test execution can be http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/streaming/src/main/scala/org/apache/spark/streaming/State.scala ---------------------------------------------------------------------- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/State.scala b/streaming/src/main/scala/org/apache/spark/streaming/State.scala index 42424d6..3f560f8 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala @@ -120,7 +120,7 @@ sealed abstract class State[S] { def isTimingOut(): Boolean /** - * Get the state as an [[scala.Option]]. It will be `Some(state)` if it exists, otherwise `None`. + * Get the state as a [[scala.Option]]. It will be `Some(state)` if it exists, otherwise `None`. */ @inline final def getOption(): Option[S] = if (exists) Some(get()) else None http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala ---------------------------------------------------------------------- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala index 7e78fa1..4c4376a 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala @@ -349,7 +349,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable { } /** - * Create an input stream from an queue of RDDs. In each batch, + * Create an input stream from a queue of RDDs. In each batch, * it will process either one or all of the RDDs returned by the queue. * * NOTE: @@ -369,7 +369,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable { } /** - * Create an input stream from an queue of RDDs. In each batch, + * Create an input stream from a queue of RDDs. In each batch, * it will process either one or all of the RDDs returned by the queue. * * NOTE: @@ -393,7 +393,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable { } /** - * Create an input stream from an queue of RDDs. In each batch, + * Create an input stream from a queue of RDDs. In each batch, * it will process either one or all of the RDDs returned by the queue. * * NOTE: http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala ---------------------------------------------------------------------- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala index 147e8c1..fa15a0b 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala @@ -157,7 +157,7 @@ abstract class DStream[T: ClassTag] ( def persist(level: StorageLevel): DStream[T] = { if (this.isInitialized) { throw new UnsupportedOperationException( - "Cannot change storage level of an DStream after streaming context has started") + "Cannot change storage level of a DStream after streaming context has started") } this.storageLevel = level this @@ -176,7 +176,7 @@ abstract class DStream[T: ClassTag] ( def checkpoint(interval: Duration): DStream[T] = { if (isInitialized) { throw new UnsupportedOperationException( - "Cannot change checkpoint interval of an DStream after streaming context has started") + "Cannot change checkpoint interval of a DStream after streaming context has started") } persist() checkpointDuration = interval http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala ---------------------------------------------------------------------- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala index 47968af..8c3a797 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala @@ -31,5 +31,5 @@ private[streaming] case class ArrayBufferBlock(arrayBuffer: ArrayBuffer[_]) exte /** class representing a block received as an Iterator */ private[streaming] case class IteratorBlock(iterator: Iterator[_]) extends ReceivedBlock -/** class representing a block received as an ByteBuffer */ +/** class representing a block received as a ByteBuffer */ private[streaming] case class ByteBufferBlock(byteBuffer: ByteBuffer) extends ReceivedBlock http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala ---------------------------------------------------------------------- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala index 391a461..4105171 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala @@ -31,7 +31,7 @@ import org.apache.spark.streaming.receiver.Receiver * all receivers at the same time. ReceiverTracker will call `scheduleReceivers` at this phase. * It will try to schedule receivers such that they are evenly distributed. ReceiverTracker * should update its `receiverTrackingInfoMap` according to the results of `scheduleReceivers`. - * `ReceiverTrackingInfo.scheduledLocations` for each receiver should be set to an location list + * `ReceiverTrackingInfo.scheduledLocations` for each receiver should be set to a location list * that contains the scheduled locations. Then when a receiver is starting, it will send a * register request and `ReceiverTracker.registerReceiver` will be called. In * `ReceiverTracker.registerReceiver`, if a receiver's scheduled locations is set, it should http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala ---------------------------------------------------------------------- diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala index bdbac64..bd8f995 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala @@ -71,7 +71,7 @@ trait DStreamCheckpointTester { self: SparkFunSuite => /** * Tests a streaming operation under checkpointing, by restarting the operation * from checkpoint file and verifying whether the final output is correct. - * The output is assumed to have come from a reliable queue which an replay + * The output is assumed to have come from a reliable queue which a replay * data as required. * * NOTE: This takes into consideration that the last batch processed before http://git-wip-us.apache.org/repos/asf/spark/blob/90e94b82/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala ---------------------------------------------------------------------- diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala index a1af95be..1a0460c 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala @@ -119,7 +119,7 @@ class PIDRateEstimatorSuite extends SparkFunSuite with Matchers { test("with no accumulated but some positive error, |I| > 0, follow the processing speed") { val p = new PIDRateEstimator(20, 1D, 1D, 0D, 10) - // prepare a series of batch updates, one every 20ms with an decreasing number of processed + // prepare a series of batch updates, one every 20ms with a decreasing number of processed // elements in each batch, but constant processing time, and no accumulated error. Even though // the integral part is non-zero, the estimated rate should follow only the proportional term, // asking for less and less elements --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org