[spark] branch master updated: [SPARK-28414][WEBUI] UI updates to show resource info in Standalone
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 70f4bbc [SPARK-28414][WEBUI] UI updates to show resource info in Standalone 70f4bbc is described below commit 70f4bbccc511c60266511c752a1fa37b20be3f8d Author: wuyi AuthorDate: Tue Aug 27 08:59:29 2019 -0500 [SPARK-28414][WEBUI] UI updates to show resource info in Standalone ## What changes were proposed in this pull request? Since SPARK-27371 has supported GPU-aware resource scheduling in Standalone, this PR adds resources info in Standalone UI. ## How was this patch tested? Updated `JsonProtocolSuite` and tested manually. Master page: ![masterpage](https://user-images.githubusercontent.com/16397174/62835958-b933c100-bc90-11e9-814f-22bae048303d.png) Worker page ![workerpage](https://user-images.githubusercontent.com/16397174/63417947-d2790200-c434-11e9-8979-36b8f558afd3.png) Application page ![applicationpage](https://user-images.githubusercontent.com/16397174/62835964-cbadfa80-bc90-11e9-99a2-26e05421619a.png) Closes #25409 from Ngone51/SPARK-28414. Authored-by: wuyi Signed-off-by: Thomas Graves --- .../org/apache/spark/deploy/DeployMessage.scala| 4 +- .../org/apache/spark/deploy/JsonProtocol.scala | 39 +++- .../spark/deploy/StandaloneResourceUtils.scala | 74 ++ .../org/apache/spark/deploy/master/Master.scala| 4 +- .../apache/spark/deploy/master/WorkerInfo.scala| 29 +++-- .../spark/deploy/master/ui/ApplicationPage.scala | 9 ++- .../apache/spark/deploy/master/ui/MasterPage.scala | 41 ++-- .../org/apache/spark/deploy/worker/Worker.scala| 25 +++- .../apache/spark/deploy/worker/ui/WorkerPage.scala | 28 +++- .../apache/spark/resource/ResourceAllocator.scala | 1 - .../spark/resource/ResourceInformation.scala | 2 + .../org/apache/spark/deploy/DeployTestUtils.scala | 40 ++-- .../apache/spark/deploy/JsonProtocolSuite.scala| 37 +-- 13 files changed, 298 insertions(+), 35 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala index 3f1d1ae..fba371d 100644 --- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala @@ -238,7 +238,9 @@ private[deploy] object DeployMessages { case class WorkerStateResponse(host: String, port: Int, workerId: String, executors: List[ExecutorRunner], finishedExecutors: List[ExecutorRunner], drivers: List[DriverRunner], finishedDrivers: List[DriverRunner], masterUrl: String, -cores: Int, memory: Int, coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String) { +cores: Int, memory: Int, coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String, +resources: Map[String, ResourceInformation] = Map.empty, +resourcesUsed: Map[String, ResourceInformation] = Map.empty) { Utils.checkHost(host) assert (port > 0) diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala index 7212696..6c3276c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala +++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala @@ -17,15 +17,29 @@ package org.apache.spark.deploy -import org.json4s.JsonAST.JObject +import org.json4s.JsonAST._ import org.json4s.JsonDSL._ import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse} import org.apache.spark.deploy.master._ import org.apache.spark.deploy.master.RecoveryState.MasterState import org.apache.spark.deploy.worker.ExecutorRunner +import org.apache.spark.resource.{ResourceInformation, ResourceRequirement} private[deploy] object JsonProtocol { + + private def writeResourcesInfo(info: Map[String, ResourceInformation]): JObject = { +val jsonFields = info.map { + case (k, v) => JField(k, v.toJson) +} +JObject(jsonFields.toList) + } + + private def writeResourceRequirement(req: ResourceRequirement): JObject = { +("name" -> req.resourceName) ~ +("amount" -> req.amount) + } + /** * Export the [[WorkerInfo]] to a Json object. A [[WorkerInfo]] consists of the information of a * worker. @@ -41,6 +55,9 @@ private[deploy] object JsonProtocol { * `memory` total memory of the worker * `memoryused` allocated memory of the worker * `memoryfree` free memory of the worker + * `resources` total resources of the worker + * `resourcesused` allocated resources of the worker + * `resourcesfr
[spark] branch master updated: [SPARK-27371][CORE] Support GPU-aware resources scheduling in Standalone
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new cbad616 [SPARK-27371][CORE] Support GPU-aware resources scheduling in Standalone cbad616 is described below commit cbad616d4cb0c58993a88df14b5e30778c7f7e85 Author: wuyi AuthorDate: Fri Aug 9 07:49:03 2019 -0500 [SPARK-27371][CORE] Support GPU-aware resources scheduling in Standalone ## What changes were proposed in this pull request? In this PR, we implements a complete process of GPU-aware resources scheduling in Standalone. The whole process looks like: Worker sets up isolated resources when it starts up and registers to master along with its resources. And, Master picks up usable workers according to driver/executor's resource requirements to launch driver/executor on them. Then, Worker launches the driver/executor after preparing resources file, which is created under driver/executor's working directory, with specified resource addresses(told by master). When driver/executor finished, their resources could be recycled to worker. Finally, if a worker stops, it should always release its resources firstly. For the case of Workers and Drivers in **client** mode run on the same host, we introduce a config option named `spark.resources.coordinate.enable`(default true) to indicate whether Spark should coordinate resources for user. If `spark.resources.coordinate.enable=false`, user should be responsible for configuring different resources for Workers and Drivers when use resourcesFile or discovery script. If true, Spark would help user to assign different resources for Workers and Drivers. The solution for Spark to coordinate resources among Workers and Drivers is: Generally, use a shared file named *allocated_resources.json* to sync allocated resources info among Workers and Drivers on the same host. After a Worker or Driver found all resources using the configured resourcesFile and/or discovery script during launching, it should filter out available resources by excluding resources already allocated in *allocated_resources.json* and acquire resources from available resources according to its own requirement. After that, it should write its allocated resources along with its process id (pid) into *allocated_resources.json*. Pid (proposed by tgravescs) here used to check whether the allocated resources are still valid in case of Worker or Driver cras [...] Note that we'll always get a file lock before any access to file *allocated_resources.json* and release the lock finally. Futhermore, we appended resources info in `WorkerSchedulerStateResponse` to work around master change behaviour in HA mode. ## How was this patch tested? Added unit tests in WorkerSuite, MasterSuite, SparkContextSuite. Manually tested with client/cluster mode (e.g. multiple workers) in a single node Standalone. Closes #25047 from Ngone51/SPARK-27371. Authored-by: wuyi Signed-off-by: Thomas Graves --- .gitignore | 1 + .../main/scala/org/apache/spark/SparkContext.scala | 34 +- .../spark/deploy/ApplicationDescription.scala | 5 +- .../scala/org/apache/spark/deploy/Client.scala | 7 +- .../org/apache/spark/deploy/DeployMessage.scala| 28 +- .../apache/spark/deploy/DriverDescription.scala| 5 +- .../apache/spark/deploy/LocalSparkCluster.scala| 3 +- .../spark/deploy/StandaloneResourceUtils.scala | 348 + .../spark/deploy/master/ApplicationInfo.scala | 5 +- .../apache/spark/deploy/master/DriverInfo.scala| 8 + .../apache/spark/deploy/master/ExecutorDesc.scala | 6 +- .../org/apache/spark/deploy/master/Master.scala| 108 +-- .../apache/spark/deploy/master/WorkerInfo.scala| 58 +++- .../spark/deploy/rest/StandaloneRestServer.scala | 6 +- .../apache/spark/deploy/worker/DriverRunner.scala | 14 +- .../spark/deploy/worker/ExecutorRunner.scala | 11 +- .../org/apache/spark/deploy/worker/Worker.scala| 77 +++-- .../executor/CoarseGrainedExecutorBackend.scala| 4 +- .../org/apache/spark/internal/config/package.scala | 17 + .../ResourceAllocator.scala} | 17 +- .../org/apache/spark/resource/ResourceUtils.scala | 42 ++- .../spark/scheduler/ExecutorResourceInfo.scala | 77 + .../apache/spark/scheduler/TaskSchedulerImpl.scala | 7 +- .../cluster/StandaloneSchedulerBackend.scala | 6 +- .../main/scala/org/apache/spark/util/Utils.scala | 42 +++ .../scala/org/apache/spark/SparkConfSuite.scala| 6 +- .../scala/org/apache/spark/SparkContextSuite.scala | 15
[spark] branch master updated: [SPARK-28213][SQL] Replace ColumnarBatchScan with equivilant from Columnar
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 8dff711 [SPARK-28213][SQL] Replace ColumnarBatchScan with equivilant from Columnar 8dff711 is described below commit 8dff711ce732d476593a4e235d68e5e1728046cb Author: Robert (Bobby) Evans AuthorDate: Thu Jul 11 09:03:30 2019 -0500 [SPARK-28213][SQL] Replace ColumnarBatchScan with equivilant from Columnar ## What changes were proposed in this pull request? This is a second part of the https://issues.apache.org/jira/browse/SPARK-27396 and a follow on to #24795 ## How was this patch tested? I did some manual tests and ran/updated the automated tests I did some simple performance tests on a single node to try to verify that there is no performance impact, and I was not able to measure anything beyond noise. Closes #25008 from revans2/columnar-remove-batch-scan. Authored-by: Robert (Bobby) Evans Signed-off-by: Thomas Graves --- .../org/apache/spark/sql/execution/Columnar.scala | 10 +- .../spark/sql/execution/ColumnarBatchScan.scala| 167 - .../spark/sql/execution/DataSourceScanExec.scala | 60 .../sql/execution/WholeStageCodegenExec.scala | 6 +- .../execution/adaptive/AdaptiveSparkPlanExec.scala | 2 + .../execution/columnar/InMemoryTableScanExec.scala | 109 +++--- .../execution/datasources/v2/BatchScanExec.scala | 2 +- .../datasources/v2/DataSourceV2ScanExecBase.scala | 34 +++-- .../datasources/v2/MicroBatchScanExec.scala| 2 +- .../org/apache/spark/sql/CachedTableSuite.scala| 2 +- .../scala/org/apache/spark/sql/SubquerySuite.scala | 5 +- .../execution/LogicalPlanTagInSparkPlanSuite.scala | 13 +- .../sql/execution/WholeStageCodegenSuite.scala | 42 -- .../columnar/InMemoryColumnarQuerySuite.scala | 11 +- .../datasources/parquet/ParquetQuerySuite.scala| 4 +- .../sql/execution/metric/SQLMetricsSuite.scala | 6 +- .../org/apache/spark/sql/test/SQLTestUtils.scala | 2 +- 17 files changed, 142 insertions(+), 335 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala index 315eba6..4385843 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala @@ -53,8 +53,8 @@ class ColumnarRule { * Provides a common executor to translate an [[RDD]] of [[ColumnarBatch]] into an [[RDD]] of * [[InternalRow]]. This is inserted whenever such a transition is determined to be needed. * - * The implementation is based off of similar implementations in [[ColumnarBatchScan]], - * [[org.apache.spark.sql.execution.python.ArrowEvalPythonExec]], and + * The implementation is based off of similar implementations in + * [[org.apache.spark.sql.execution.python.ArrowEvalPythonExec]] and * [[MapPartitionsInRWithArrowExec]]. Eventually this should replace those implementations. */ case class ColumnarToRowExec(child: SparkPlan) @@ -96,9 +96,6 @@ case class ColumnarToRowExec(child: SparkPlan) /** * Generate [[ColumnVector]] expressions for our parent to consume as rows. * This is called once per [[ColumnVector]] in the batch. - * - * This code came unchanged from [[ColumnarBatchScan]] and will hopefully replace it - * at some point. */ private def genCodeColumnVector( ctx: CodegenContext, @@ -130,9 +127,6 @@ case class ColumnarToRowExec(child: SparkPlan) * Produce code to process the input iterator as [[ColumnarBatch]]es. * This produces an [[org.apache.spark.sql.catalyst.expressions.UnsafeRow]] for each row in * each batch. - * - * This code came almost completely unchanged from [[ColumnarBatchScan]] and will - * hopefully replace it at some point. */ override protected def doProduce(ctx: CodegenContext): String = { // PhysicalRDD always just has one input diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala deleted file mode 100644 index b2e9f76..000 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of t
[spark] branch master updated: [SPARK-27945][SQL] Minimal changes to support columnar processing
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c341de8 [SPARK-27945][SQL] Minimal changes to support columnar processing c341de8 is described below commit c341de8b3e1f1d3327bd4ae3b0d2ec048f64d306 Author: Robert (Bobby) Evans AuthorDate: Fri Jun 28 14:00:12 2019 -0500 [SPARK-27945][SQL] Minimal changes to support columnar processing ## What changes were proposed in this pull request? This is the first part of [SPARK-27396](https://issues.apache.org/jira/browse/SPARK-27396). This is the minimum set of changes necessary to support a pluggable back end for columnar processing. Follow on JIRAs would cover removing some of the duplication between functionality in this patch and functionality currently covered by things like ColumnarBatchScan. ## How was this patch tested? I added in a new unit test to cover new code not really covered in other places. I also did manual testing by implementing two plugins/extensions that take advantage of the new APIs to allow for columnar processing for some simple queries. One version runs on the [CPU](https://gist.github.com/revans2/c3cad77075c4fa5d9d271308ee2f1b1d). The other version run on a GPU, but because it has unreleased dependencies I will not include a link to it yet. The CPU version I would expect to add in as an example with other documentation in a follow on JIRA This is contributed on behalf of NVIDIA Corporation. Closes #24795 from revans2/columnar-basic. Authored-by: Robert (Bobby) Evans Signed-off-by: Thomas Graves --- .../apache/spark/sql/vectorized/ColumnVector.java | 2 +- .../apache/spark/sql/vectorized/ColumnarBatch.java | 13 +- .../execution/vectorized/WritableColumnVector.java | 5 +- .../apache/spark/sql/SparkSessionExtensions.scala | 19 + .../org/apache/spark/sql/execution/Columnar.scala | 534 + .../spark/sql/execution/ColumnarBatchScan.scala| 2 - .../spark/sql/execution/QueryExecution.scala | 2 + .../org/apache/spark/sql/execution/SparkPlan.scala | 36 ++ .../sql/execution/WholeStageCodegenExec.scala | 97 +++- .../sql/internal/BaseSessionStateBuilder.scala | 9 +- .../apache/spark/sql/internal/SessionState.scala | 3 +- .../spark/sql/SparkSessionExtensionSuite.scala | 409 +++- .../python/BatchEvalPythonExecSuite.scala | 8 +- .../execution/vectorized/ColumnarBatchSuite.scala | 210 +++- 14 files changed, 1311 insertions(+), 38 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java index 14caaea..f18d003 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java @@ -287,7 +287,7 @@ public abstract class ColumnVector implements AutoCloseable { /** * @return child [[ColumnVector]] at the given ordinal. */ - protected abstract ColumnVector getChild(int ordinal); + public abstract ColumnVector getChild(int ordinal); /** * Data type for this column. diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java index 9f917ea..a2feac8 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java @@ -31,7 +31,7 @@ import org.apache.spark.unsafe.types.UTF8String; * the entire data loading process. */ @Evolving -public final class ColumnarBatch { +public final class ColumnarBatch implements AutoCloseable { private int numRows; private final ColumnVector[] columns; @@ -42,6 +42,7 @@ public final class ColumnarBatch { * Called to close all the columns in this batch. It is not valid to access the data after * calling this. This must be called at the end to clean up memory allocations. */ + @Override public void close() { for (ColumnVector c: columns) { c.close(); @@ -110,7 +111,17 @@ public final class ColumnarBatch { } public ColumnarBatch(ColumnVector[] columns) { +this(columns, 0); + } + + /** + * Create a new batch from existing column vectors. + * @param columns The columns of this batch + * @param numRows The number of rows in this batch + */ + public ColumnarBatch(ColumnVector[] columns, int numRows) { this.columns = columns; +this.numRows = numRows; this.row = new ColumnarBatchRow(columns); } } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized
[spark] branch master updated: [SPARK-27760][CORE] Spark resources - change user resource config from .count to .amount
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d30284b [SPARK-27760][CORE] Spark resources - change user resource config from .count to .amount d30284b is described below commit d30284b5a51dd784f663eb4eea37087b35a54d00 Author: Thomas Graves AuthorDate: Thu Jun 6 14:16:05 2019 -0500 [SPARK-27760][CORE] Spark resources - change user resource config from .count to .amount ## What changes were proposed in this pull request? Change the resource config spark.{executor/driver}.resource.{resourceName}.count to .amount to allow future usage of containing both a count and a unit. Right now we only support counts - # of gpus for instance, but in the future we may want to support units for things like memory - 25G. I think making the user only have to specify a single config .amount is better then making them specify 2 separate configs of a .count and then a .unit. Change it now since its a user facing config. Amount also matches how the spark on yarn configs are setup. ## How was this patch tested? Unit tests and manually verified on yarn and local cluster mode Closes #24810 from tgravescs/SPARK-27760-amount. Authored-by: Thomas Graves Signed-off-by: Thomas Graves --- .../main/scala/org/apache/spark/SparkConf.scala| 4 +-- .../main/scala/org/apache/spark/SparkContext.scala | 12 .../main/scala/org/apache/spark/TestUtils.scala| 2 +- .../executor/CoarseGrainedExecutorBackend.scala| 2 +- .../org/apache/spark/internal/config/package.scala | 2 +- .../org/apache/spark/ResourceDiscovererSuite.scala | 2 +- .../scala/org/apache/spark/SparkConfSuite.scala| 8 ++--- .../scala/org/apache/spark/SparkContextSuite.scala | 24 +++ .../CoarseGrainedExecutorBackendSuite.scala| 26 .../CoarseGrainedSchedulerBackendSuite.scala | 2 +- .../spark/scheduler/TaskSchedulerImplSuite.scala | 4 +-- docs/configuration.md | 14 - .../apache/spark/deploy/k8s/KubernetesUtils.scala | 4 +-- .../k8s/features/BasicDriverFeatureStepSuite.scala | 2 +- .../features/BasicExecutorFeatureStepSuite.scala | 4 +-- .../spark/deploy/yarn/ResourceRequestHelper.scala | 8 ++--- .../spark/deploy/yarn/YarnSparkHadoopUtil.scala| 2 +- .../org/apache/spark/deploy/yarn/ClientSuite.scala | 36 ++ .../spark/deploy/yarn/YarnAllocatorSuite.scala | 4 +-- 19 files changed, 93 insertions(+), 69 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index 227f4a5..e231a40 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -512,8 +512,8 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria */ private[spark] def getTaskResourceRequirements(): Map[String, Int] = { getAllWithPrefix(SPARK_TASK_RESOURCE_PREFIX) - .withFilter { case (k, v) => k.endsWith(SPARK_RESOURCE_COUNT_SUFFIX)} - .map { case (k, v) => (k.dropRight(SPARK_RESOURCE_COUNT_SUFFIX.length), v.toInt)}.toMap + .withFilter { case (k, v) => k.endsWith(SPARK_RESOURCE_AMOUNT_SUFFIX)} + .map { case (k, v) => (k.dropRight(SPARK_RESOURCE_AMOUNT_SUFFIX.length), v.toInt)}.toMap } /** diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 66f8f41..c169842 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -391,7 +391,7 @@ class SparkContext(config: SparkConf) extends Logging { } // verify the resources we discovered are what the user requested val driverReqResourcesAndCounts = - SparkConf.getConfigsWithSuffix(allDriverResourceConfs, SPARK_RESOURCE_COUNT_SUFFIX).toMap + SparkConf.getConfigsWithSuffix(allDriverResourceConfs, SPARK_RESOURCE_AMOUNT_SUFFIX).toMap ResourceDiscoverer.checkActualResourcesMeetRequirements(driverReqResourcesAndCounts, _resources) logInfo("===") @@ -2725,7 +2725,7 @@ object SparkContext extends Logging { // executor and resources required by each task. val taskResourcesAndCount = sc.conf.getTaskResourceRequirements() val executorResourcesAndCounts = sc.conf.getAllWithPrefixAndSuffix( -SPARK_EXECUTOR_RESOURCE_PREFIX, SPARK_RESOURCE_COUNT_SUFFIX).toMap +SPARK_EXECUTOR_RESOURCE_PREFIX, SPARK_RESOURCE_AMOUNT_SUFFIX).toMap var numSlots = execCores / taskCores var limit
[spark] branch master updated: [SPARK-27362][K8S] Resource Scheduling support for k8s
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 1277f8f [SPARK-27362][K8S] Resource Scheduling support for k8s 1277f8f is described below commit 1277f8fa92da85d9e39d9146e3099fcb75c71a8f Author: Thomas Graves AuthorDate: Fri May 31 15:26:14 2019 -0500 [SPARK-27362][K8S] Resource Scheduling support for k8s ## What changes were proposed in this pull request? Add ability to map the spark resource configs spark.{executor/driver}.resource.{resourceName} to kubernetes Container builder so that we request resources (gpu,s/fpgas/etc) from kubernetes. Note that the spark configs will overwrite any resource configs users put into a pod template. I added a generic vendor config which is only used by kubernetes right now. I intentionally didn't put it into the kubernetes config namespace just to avoid adding more config prefixes. I will add more documentation for this under jira SPARK-27492. I think it will be easier to do all at once to get cohesive story. ## How was this patch tested? Unit tests and manually testing on k8s cluster. Closes #24703 from tgravescs/SPARK-27362. Authored-by: Thomas Graves Signed-off-by: Thomas Graves --- .../org/apache/spark/internal/config/package.scala | 1 + docs/configuration.md | 20 docs/running-on-kubernetes.md | 1 + .../apache/spark/deploy/k8s/KubernetesConf.scala | 9 .../apache/spark/deploy/k8s/KubernetesUtils.scala | 29 ++- .../k8s/features/BasicDriverFeatureStep.scala | 4 ++ .../k8s/features/BasicExecutorFeatureStep.scala| 13 +++-- .../k8s/features/BasicDriverFeatureStepSuite.scala | 14 +- .../features/BasicExecutorFeatureStepSuite.scala | 56 +- .../k8s/features/KubernetesFeaturesTestUtils.scala | 2 + 10 files changed, 142 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index a5d36b5..8ea8887 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -37,6 +37,7 @@ package object config { private[spark] val SPARK_RESOURCE_COUNT_SUFFIX = ".count" private[spark] val SPARK_RESOURCE_DISCOVERY_SCRIPT_SUFFIX = ".discoveryScript" + private[spark] val SPARK_RESOURCE_VENDOR_SUFFIX = ".vendor" private[spark] val DRIVER_RESOURCES_FILE = ConfigBuilder("spark.driver.resourcesFile") diff --git a/docs/configuration.md b/docs/configuration.md index 2169951..24e66e1 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -207,6 +207,16 @@ of the most common options to set are: + spark.driver.resource.{resourceName}.vendor + None + +Vendor of the resources to use for the driver. This option is currently +only supported on Kubernetes and is actually both the vendor and domain following +the Kubernetes device plugin naming convention. (e.g. For GPUs on Kubernetes +this config would be set to nvidia.com or amd.com) + + + spark.executor.memory 1g @@ -260,6 +270,16 @@ of the most common options to set are: + spark.executor.resource.{resourceName}.vendor + None + +Vendor of the resources to use for the executors. This option is currently +only supported on Kubernetes and is actually both the vendor and domain following +the Kubernetes device plugin naming convention. (e.g. For GPUs on Kubernetes +this config would be set to nvidia.com or amd.com) + + + spark.extraListeners (none) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 8a424b5..d4efb52 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1246,6 +1246,7 @@ The following affect the driver and executor containers. All other containers in The cpu limits are set by spark.kubernetes.{driver,executor}.limit.cores. The cpu is set by spark.{driver,executor}.cores. The memory request and limit are set by summing the values of spark.{driver,executor}.memory and spark.{driver,executor}.memoryOverhead. +Other resource limits are set by spark.{driver,executor}.resources.{resourceName}.* configs. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala index 5e74111..a2a4661 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala +++ b/resource-managers/kubernetes/cor
[spark] branch master updated: [SPARK-27835][CORE] Resource Scheduling: change driver config from addresses
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 6748b48 [SPARK-27835][CORE] Resource Scheduling: change driver config from addresses 6748b48 is described below commit 6748b486a9afe8370786efb64a8c9f3470c62dcf Author: Thomas Graves AuthorDate: Thu May 30 07:51:06 2019 -0500 [SPARK-27835][CORE] Resource Scheduling: change driver config from addresses ## What changes were proposed in this pull request? Change the Driver resource discovery argument for standalone mode to be a file rather then separate address configs per resource. This makes it consistent with how the Executor is doing it and makes it more flexible in the future, and it makes for less configs if you have multiple resources. ## How was this patch tested? Unit tests and basic manually testing to make sure files were parsed properly. Closes #24730 from tgravescs/SPARK-27835-driver-resourcesFile. Authored-by: Thomas Graves Signed-off-by: Thomas Graves --- .../org/apache/spark/ResourceDiscoverer.scala | 20 -- .../main/scala/org/apache/spark/SparkContext.scala | 23 - .../executor/CoarseGrainedExecutorBackend.scala| 24 +- .../org/apache/spark/internal/config/package.scala | 10 - .../scala/org/apache/spark/SparkContextSuite.scala | 22 .../CoarseGrainedExecutorBackendSuite.scala| 4 ++-- 6 files changed, 61 insertions(+), 42 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala b/core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala index d3b3860..e5ae202 100644 --- a/core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala +++ b/core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala @@ -17,11 +17,11 @@ package org.apache.spark -import java.io.File +import java.io.{BufferedInputStream, File, FileInputStream} import com.fasterxml.jackson.core.JsonParseException +import com.fasterxml.jackson.databind.exc.MismatchedInputException import org.json4s.{DefaultFormats, MappingException} -import org.json4s.JsonAST.JValue import org.json4s.jackson.JsonMethods._ import org.apache.spark.internal.Logging @@ -132,4 +132,20 @@ private[spark] object ResourceDiscoverer extends Logging { } } } + + def parseAllocatedFromJsonFile(resourcesFile: String): Map[String, ResourceInformation] = { +implicit val formats = DefaultFormats +// case class to make json4s parsing easy +case class JsonResourceInformation(val name: String, val addresses: Array[String]) +val resourceInput = new BufferedInputStream(new FileInputStream(resourcesFile)) +val resources = try { + parse(resourceInput).extract[Seq[JsonResourceInformation]] +} catch { + case e@(_: MappingException | _: MismatchedInputException | _: ClassCastException) => +throw new SparkException(s"Exception parsing the resources in $resourcesFile", e) +} finally { + resourceInput.close() +} +resources.map(r => (r.name, new ResourceInformation(r.name, r.addresses))).toMap + } } diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 878010d..6266ce6 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -365,29 +365,24 @@ class SparkContext(config: SparkConf) extends Logging { /** * Checks to see if any resources (GPU/FPGA/etc) are available to the driver by looking - * at and processing the spark.driver.resource.resourceName.addresses and + * at and processing the spark.driver.resourcesFile and * spark.driver.resource.resourceName.discoveryScript configs. The configs have to be * present when the driver starts, setting them after startup does not work. * - * If any resource addresses configs were specified then assume all resources will be specified - * in that way. Otherwise use the discovery scripts to find the resources. Users should - * not really be setting the addresses config directly and should not be mixing methods - * for different types of resources since the addresses config is meant for Standalone mode + * If a resources file was specified then assume all resources will be specified + * in that file. Otherwise use the discovery scripts to find the resources. Users should + * not be setting the resources file config directly and should not be mixing methods + * for different types of resources since the resources file config is meant for Standalone mode * and other cluster managers should use the discovery scripts. */ private def setupDriverResources(): Unit
[spark] branch master updated: [SPARK-27024] Executor interface for cluster managers to support GPU and other resources
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new db2e3c4 [SPARK-27024] Executor interface for cluster managers to support GPU and other resources db2e3c4 is described below commit db2e3c43412e4a7fb4a46c58d73d9ab304a1e949 Author: Thomas Graves AuthorDate: Tue May 14 08:41:41 2019 -0500 [SPARK-27024] Executor interface for cluster managers to support GPU and other resources ## What changes were proposed in this pull request? Add in GPU and generic resource type allocation to the executors. Note this is part of a bigger feature for gpu-aware scheduling and is just how the executor find the resources. The general flow : - users ask for a certain set of resources, for instance number of gpus - each cluster manager has a specific way to do this. - cluster manager allocates a container or set of resources (standalone mode) -When spark launches the executor in that container, the executor either has to be told what resources it has or it has to auto discover them. - Executor has to register with Driver and tell the driver the set of resources it has so the scheduler can use that to schedule tasks that requires a certain amount of each of those resources In this pr I added configs and arguments to the executor to be able discover resources. The argument to the executor is intended to be used by standalone mode or other cluster managers that don't have isolation so that it can assign specific resources to specific executors in case there are multiple executors on a node. The argument is a file contains JSON Array of ResourceInformation objects. The discovery script is meant to be used in an isolated environment where the executor only sees the resources it should use. Note that there will be follow on PRs to add other parts like the scheduler part. See the epic high level jira: https://issues.apache.org/jira/browse/SPARK-24615 ## How was this patch tested? Added unit tests and manually tested. Please review http://spark.apache.org/contributing.html before opening a pull request. Closes #24406 from tgravescs/gpu-sched-executor-clean. Authored-by: Thomas Graves Signed-off-by: Thomas Graves --- .../org/apache/spark/ResourceDiscoverer.scala | 93 .../org/apache/spark/ResourceInformation.scala | 37 +++ .../executor/CoarseGrainedExecutorBackend.scala| 120 +- .../org/apache/spark/internal/config/package.scala | 7 + .../cluster/CoarseGrainedClusterMessage.scala | 4 +- .../cluster/CoarseGrainedSchedulerBackend.scala| 3 +- .../org/apache/spark/HeartbeatReceiverSuite.scala | 6 +- .../org/apache/spark/ResourceDiscovererSuite.scala | 186 +++ .../deploy/StandaloneDynamicAllocationSuite.scala | 6 +- .../CoarseGrainedExecutorBackendSuite.scala| 262 + .../CoarseGrainedSchedulerBackendSuite.scala | 6 +- docs/configuration.md | 28 +++ .../MesosCoarseGrainedSchedulerBackendSuite.scala | 2 +- .../YarnCoarseGrainedExecutorBackend.scala | 8 +- 14 files changed, 749 insertions(+), 19 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala b/core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala new file mode 100644 index 000..1963942 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ResourceDiscoverer.scala @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import java.io.File + +import com.fasterxml.jackson.core.JsonParseException +import org.json4s.{DefaultFormats, MappingException} +import org.json4s.JsonAST.JValue +import org.json4s.jackson.JsonMethods._ + +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config._ +import org.apache.spark.util.Utils.executeAndGetOutput + +/** + * Discovers resources (GPUs/FPGAs/etc). It cur
[spark] branch branch-2.4 updated: [SPARK-26269][YARN][BRANCH-2.4] Yarnallocator should have same blacklist behaviour with yarn to maxmize use of cluster resource
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.4 by this push: new b4202e7 [SPARK-26269][YARN][BRANCH-2.4] Yarnallocator should have same blacklist behaviour with yarn to maxmize use of cluster resource b4202e7 is described below commit b4202e79833f3adc00afe00f43e8d9165c9c8e48 Author: wuyi AuthorDate: Mon Jan 7 16:22:28 2019 -0600 [SPARK-26269][YARN][BRANCH-2.4] Yarnallocator should have same blacklist behaviour with yarn to maxmize use of cluster resource ## What changes were proposed in this pull request? As I mentioned in jira [SPARK-26269](https://issues.apache.org/jira/browse/SPARK-26269), in order to maxmize the use of cluster resource, this pr try to make `YarnAllocator` have the same blacklist behaviour with YARN. ## How was this patch tested? Added. Closes #23368 from Ngone51/dev-YarnAllocator-should-have-same-blacklist-behaviour-with-YARN-branch-2.4. Lead-authored-by: wuyi Co-authored-by: Ngone51 Signed-off-by: Thomas Graves --- .../apache/spark/deploy/yarn/YarnAllocator.scala | 31 ++-- .../yarn/YarnAllocatorBlacklistTracker.scala | 4 +- .../yarn/YarnAllocatorBlacklistTrackerSuite.scala | 2 +- .../spark/deploy/yarn/YarnAllocatorSuite.scala | 83 -- 4 files changed, 107 insertions(+), 13 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index f4dc80a..3357084 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -578,13 +578,23 @@ private[yarn] class YarnAllocator( (true, memLimitExceededLogMessage( completedContainer.getDiagnostics, PMEM_EXCEEDED_PATTERN)) - case _ => -// all the failures which not covered above, like: -// disk failure, kill by app master or resource manager, ... -allocatorBlacklistTracker.handleResourceAllocationFailure(hostOpt) -(true, "Container marked as failed: " + containerId + onHostStr + - ". Exit status: " + completedContainer.getExitStatus + - ". Diagnostics: " + completedContainer.getDiagnostics) + case other_exit_status => +// SPARK-26269: follow YARN's blacklisting behaviour(see https://github +// .com/apache/hadoop/blob/228156cfd1b474988bc4fedfbf7edddc87db41e3/had +// oop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/ap +// ache/hadoop/yarn/util/Apps.java#L273 for details) +if (NOT_APP_AND_SYSTEM_FAULT_EXIT_STATUS.contains(other_exit_status)) { + (false, s"Container marked as failed: $containerId$onHostStr" + +s". Exit status: ${completedContainer.getExitStatus}" + +s". Diagnostics: ${completedContainer.getDiagnostics}.") +} else { + // completed container from a bad node + allocatorBlacklistTracker.handleResourceAllocationFailure(hostOpt) + (true, s"Container from a bad node: $containerId$onHostStr" + +s". Exit status: ${completedContainer.getExitStatus}" + +s". Diagnostics: ${completedContainer.getDiagnostics}.") +} + } if (exitCausedByApp) { @@ -722,4 +732,11 @@ private object YarnAllocator { "Consider boosting spark.yarn.executor.memoryOverhead or " + "disabling yarn.nodemanager.vmem-check-enabled because of YARN-4714." } + val NOT_APP_AND_SYSTEM_FAULT_EXIT_STATUS = Set( +ContainerExitStatus.KILLED_BY_RESOURCEMANAGER, +ContainerExitStatus.KILLED_BY_APPMASTER, +ContainerExitStatus.KILLED_AFTER_APP_COMPLETION, +ContainerExitStatus.ABORTED, +ContainerExitStatus.DISKS_FAILED + ) } diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala index ceac7cd..268976b 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala @@ -120,7 +120,9 @@ private[spark] class YarnAllocatorBlacklistTracker( if (removals.nonEmpty) { logInfo(s"removing nodes from YARN application master's blacklist: $removals&q
[spark] branch master updated: [SPARK-26285][CORE] accumulator metrics sources for LongAccumulator and Doub…
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 0a02d5c [SPARK-26285][CORE] accumulator metrics sources for LongAccumulator and Doub… 0a02d5c is described below commit 0a02d5c36fc5035abcfb930e1a229d65c6cf683f Author: Alessandro Bellina AuthorDate: Sat Dec 22 09:03:02 2018 -0600 [SPARK-26285][CORE] accumulator metrics sources for LongAccumulator and Doub… …leAccumulator ## What changes were proposed in this pull request? This PR implements metric sources for LongAccumulator and DoubleAccumulator, such that a user can register these accumulators easily and have their values be reported by the driver's metric namespace. ## How was this patch tested? Unit tests, and manual tests. Please review http://spark.apache.org/contributing.html before opening a pull request. Closes #23242 from abellina/SPARK-26285_accumulator_source. Lead-authored-by: Alessandro Bellina Co-authored-by: Alessandro Bellina Co-authored-by: Alessandro Bellina Signed-off-by: Thomas Graves --- .../spark/metrics/source/AccumulatorSource.scala | 89 + .../metrics/source/AccumulatorSourceSuite.scala| 91 ++ .../spark/examples/AccumulatorMetricsTest.scala| 77 ++ 3 files changed, 257 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/metrics/source/AccumulatorSource.scala b/core/src/main/scala/org/apache/spark/metrics/source/AccumulatorSource.scala new file mode 100644 index 000..45a4d22 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/metrics/source/AccumulatorSource.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.metrics.source + +import com.codahale.metrics.{Gauge, MetricRegistry} + +import org.apache.spark.SparkContext +import org.apache.spark.annotation.Experimental +import org.apache.spark.util.{AccumulatorV2, DoubleAccumulator, LongAccumulator} + +/** + * AccumulatorSource is a Spark metric Source that reports the current value + * of the accumulator as a gauge. + * + * It is restricted to the LongAccumulator and the DoubleAccumulator, as those + * are the current built-in numerical accumulators with Spark, and excludes + * the CollectionAccumulator, as that is a List of values (hard to report, + * to a metrics system) + */ +private[spark] class AccumulatorSource extends Source { + private val registry = new MetricRegistry + protected def register[T](accumulators: Map[String, AccumulatorV2[_, T]]): Unit = { +accumulators.foreach { + case (name, accumulator) => +val gauge = new Gauge[T] { + override def getValue: T = accumulator.value +} +registry.register(MetricRegistry.name(name), gauge) +} + } + + override def sourceName: String = "AccumulatorSource" + override def metricRegistry: MetricRegistry = registry +} + +@Experimental +class LongAccumulatorSource extends AccumulatorSource + +@Experimental +class DoubleAccumulatorSource extends AccumulatorSource + +/** + * :: Experimental :: + * Metrics source specifically for LongAccumulators. Accumulators + * are only valid on the driver side, so these metrics are reported + * only by the driver. + * Register LongAccumulators using: + *LongAccumulatorSource.register(sc, {"name" -> longAccumulator}) + */ +@Experimental +object LongAccumulatorSource { + def register(sc: SparkContext, accumulators: Map[String, LongAccumulator]): Unit = { +val source = new LongAccumulatorSource +source.register(accumulators) +sc.env.metricsSystem.registerSource(source) + } +} + +/** + * :: Experimental :: + * Metrics source specifically for DoubleAccumulators. Accumulators + * are only valid on the driver side, so these metrics are reported + * only by the driver. + * Register DoubleAccumulators using: + *DoubleAccumulatorSource.register(sc, {"name" -> doubleAccumulator}) + */ +@Experi
[spark] branch master updated: [SPARK-26269][YARN] Yarnallocator should have same blacklist behaviour with yarn to maxmize use of cluster resource
This is an automated email from the ASF dual-hosted git repository. tgraves pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d6a5f85 [SPARK-26269][YARN] Yarnallocator should have same blacklist behaviour with yarn to maxmize use of cluster resource d6a5f85 is described below commit d6a5f859848bbd237e19075dd26e1547fb3af417 Author: wuyi AuthorDate: Fri Dec 21 13:21:58 2018 -0600 [SPARK-26269][YARN] Yarnallocator should have same blacklist behaviour with yarn to maxmize use of cluster resource ## What changes were proposed in this pull request? As I mentioned in jira [SPARK-26269](https://issues.apache.org/jira/browse/SPARK-26269), in order to maxmize the use of cluster resource, this pr try to make `YarnAllocator` have the same blacklist behaviour with YARN. ## How was this patch tested? Added. Closes #23223 from Ngone51/dev-YarnAllocator-should-have-same-blacklist-behaviour-with-YARN. Lead-authored-by: wuyi Co-authored-by: Ngone51 Signed-off-by: Thomas Graves --- .../apache/spark/deploy/yarn/YarnAllocator.scala | 32 +++-- .../yarn/YarnAllocatorBlacklistTracker.scala | 4 +- .../yarn/YarnAllocatorBlacklistTrackerSuite.scala | 2 +- .../spark/deploy/yarn/YarnAllocatorSuite.scala | 75 +- 4 files changed, 101 insertions(+), 12 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 54b1ec2..a3feca5 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -607,13 +607,23 @@ private[yarn] class YarnAllocator( val message = "Container killed by YARN for exceeding physical memory limits. " + s"$diag Consider boosting ${EXECUTOR_MEMORY_OVERHEAD.key}." (true, message) - case _ => -// all the failures which not covered above, like: -// disk failure, kill by app master or resource manager, ... -allocatorBlacklistTracker.handleResourceAllocationFailure(hostOpt) -(true, "Container marked as failed: " + containerId + onHostStr + - ". Exit status: " + completedContainer.getExitStatus + - ". Diagnostics: " + completedContainer.getDiagnostics) + case other_exit_status => +// SPARK-26269: follow YARN's blacklisting behaviour(see https://github +// .com/apache/hadoop/blob/228156cfd1b474988bc4fedfbf7edddc87db41e3/had +// oop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/ap +// ache/hadoop/yarn/util/Apps.java#L273 for details) +if (NOT_APP_AND_SYSTEM_FAULT_EXIT_STATUS.contains(other_exit_status)) { + (false, s"Container marked as failed: $containerId$onHostStr" + +s". Exit status: ${completedContainer.getExitStatus}" + +s". Diagnostics: ${completedContainer.getDiagnostics}.") +} else { + // completed container from a bad node + allocatorBlacklistTracker.handleResourceAllocationFailure(hostOpt) + (true, s"Container from a bad node: $containerId$onHostStr" + +s". Exit status: ${completedContainer.getExitStatus}" + +s". Diagnostics: ${completedContainer.getDiagnostics}.") +} + } if (exitCausedByApp) { @@ -739,4 +749,12 @@ private object YarnAllocator { val MEM_REGEX = "[0-9.]+ [KMG]B" val VMEM_EXCEEDED_EXIT_CODE = -103 val PMEM_EXCEEDED_EXIT_CODE = -104 + + val NOT_APP_AND_SYSTEM_FAULT_EXIT_STATUS = Set( +ContainerExitStatus.KILLED_BY_RESOURCEMANAGER, +ContainerExitStatus.KILLED_BY_APPMASTER, +ContainerExitStatus.KILLED_AFTER_APP_COMPLETION, +ContainerExitStatus.ABORTED, +ContainerExitStatus.DISKS_FAILED + ) } diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala index ceac7cd..268976b 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala @@ -120,7 +120,9 @@ private[spark] class YarnAllocatorBlacklistTracker( if (removals.nonEmpty) { logInfo(s"removing nodes from YARN application master's blacklist: $
spark git commit: [SPARK-26201] Fix python broadcast with encryption
Repository: spark Updated Branches: refs/heads/branch-2.4 4661ac76a -> b68decf19 [SPARK-26201] Fix python broadcast with encryption ## What changes were proposed in this pull request? Python with rpc and disk encryption enabled along with a python broadcast variable and just read the value back on the driver side the job failed with: Traceback (most recent call last): File "broadcast.py", line 37, in words_new.value File "/pyspark.zip/pyspark/broadcast.py", line 137, in value File "pyspark.zip/pyspark/broadcast.py", line 122, in load_from_path File "pyspark.zip/pyspark/broadcast.py", line 128, in load EOFError: Ran out of input To reproduce use configs: --conf spark.network.crypto.enabled=true --conf spark.io.encryption.enabled=true Code: words_new = sc.broadcast(["scala", "java", "hadoop", "spark", "akka"]) words_new.value print(words_new.value) ## How was this patch tested? words_new = sc.broadcast([âscalaâ, âjavaâ, âhadoopâ, âsparkâ, âakkaâ]) textFile = sc.textFile(âREADME.mdâ) wordCounts = textFile.flatMap(lambda line: line.split()).map(lambda word: (word + words_new.value[1], 1)).reduceByKey(lambda a, b: a+b) count = wordCounts.count() print(count) words_new.value print(words_new.value) Closes #23166 from redsanket/SPARK-26201. Authored-by: schintap Signed-off-by: Thomas Graves (cherry picked from commit 9b23be2e95fec756066ca0ed3188c3db2602b757) Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b68decf1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b68decf1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b68decf1 Branch: refs/heads/branch-2.4 Commit: b68decf190e402e3d29fa05726b16bd57fe1b078 Parents: 4661ac7 Author: schintap Authored: Fri Nov 30 12:48:56 2018 -0600 Committer: Thomas Graves Committed: Fri Nov 30 12:49:17 2018 -0600 -- .../org/apache/spark/api/python/PythonRDD.scala | 29 +--- python/pyspark/broadcast.py | 21 ++ python/pyspark/test_broadcast.py| 15 ++ 3 files changed, 56 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b68decf1/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 8b5a7a9..5ed5070 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -660,6 +660,7 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial with Logging { private var encryptionServer: PythonServer[Unit] = null + private var decryptionServer: PythonServer[Unit] = null /** * Read data from disks, then copy it to `out` @@ -708,16 +709,36 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial override def handleConnection(sock: Socket): Unit = { val env = SparkEnv.get val in = sock.getInputStream() -val dir = new File(Utils.getLocalDir(env.conf)) -val file = File.createTempFile("broadcast", "", dir) -path = file.getAbsolutePath -val out = env.serializerManager.wrapForEncryption(new FileOutputStream(path)) +val abspath = new File(path).getAbsolutePath +val out = env.serializerManager.wrapForEncryption(new FileOutputStream(abspath)) DechunkedInputStream.dechunkAndCopyToOutput(in, out) } } Array(encryptionServer.port, encryptionServer.secret) } + def setupDecryptionServer(): Array[Any] = { +decryptionServer = new PythonServer[Unit]("broadcast-decrypt-server-for-driver") { + override def handleConnection(sock: Socket): Unit = { +val out = new DataOutputStream(new BufferedOutputStream(sock.getOutputStream())) +Utils.tryWithSafeFinally { + val in = SparkEnv.get.serializerManager.wrapForEncryption(new FileInputStream(path)) + Utils.tryWithSafeFinally { +Utils.copyStream(in, out, false) + } { +in.close() + } + out.flush() +} { + JavaUtils.closeQuietly(out) +} + } +} +Array(decryptionServer.port, decryptionServer.secret) + } + + def waitTillBroadcastDataSent(): Unit = decryptionServer.getResult() + def waitTillDataReceived(): Unit = encryptionServer.getResult() } // scalastyle:on no.finalize http://git-wip-us.apache.org/repos/asf/spark/blob/b68decf1/python/pyspark/broadcast.py -- diff --git
spark git commit: [SPARK-26201] Fix python broadcast with encryption
Repository: spark Updated Branches: refs/heads/master c3f27b243 -> 9b23be2e9 [SPARK-26201] Fix python broadcast with encryption ## What changes were proposed in this pull request? Python with rpc and disk encryption enabled along with a python broadcast variable and just read the value back on the driver side the job failed with: Traceback (most recent call last): File "broadcast.py", line 37, in words_new.value File "/pyspark.zip/pyspark/broadcast.py", line 137, in value File "pyspark.zip/pyspark/broadcast.py", line 122, in load_from_path File "pyspark.zip/pyspark/broadcast.py", line 128, in load EOFError: Ran out of input To reproduce use configs: --conf spark.network.crypto.enabled=true --conf spark.io.encryption.enabled=true Code: words_new = sc.broadcast(["scala", "java", "hadoop", "spark", "akka"]) words_new.value print(words_new.value) ## How was this patch tested? words_new = sc.broadcast([âscalaâ, âjavaâ, âhadoopâ, âsparkâ, âakkaâ]) textFile = sc.textFile(âREADME.mdâ) wordCounts = textFile.flatMap(lambda line: line.split()).map(lambda word: (word + words_new.value[1], 1)).reduceByKey(lambda a, b: a+b) count = wordCounts.count() print(count) words_new.value print(words_new.value) Closes #23166 from redsanket/SPARK-26201. Authored-by: schintap Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b23be2e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b23be2e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b23be2e Branch: refs/heads/master Commit: 9b23be2e95fec756066ca0ed3188c3db2602b757 Parents: c3f27b2 Author: schintap Authored: Fri Nov 30 12:48:56 2018 -0600 Committer: Thomas Graves Committed: Fri Nov 30 12:48:56 2018 -0600 -- .../org/apache/spark/api/python/PythonRDD.scala | 29 +--- python/pyspark/broadcast.py | 21 ++ python/pyspark/tests/test_broadcast.py | 15 ++ 3 files changed, 56 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9b23be2e/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 8b5a7a9..5ed5070 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -660,6 +660,7 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial with Logging { private var encryptionServer: PythonServer[Unit] = null + private var decryptionServer: PythonServer[Unit] = null /** * Read data from disks, then copy it to `out` @@ -708,16 +709,36 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial override def handleConnection(sock: Socket): Unit = { val env = SparkEnv.get val in = sock.getInputStream() -val dir = new File(Utils.getLocalDir(env.conf)) -val file = File.createTempFile("broadcast", "", dir) -path = file.getAbsolutePath -val out = env.serializerManager.wrapForEncryption(new FileOutputStream(path)) +val abspath = new File(path).getAbsolutePath +val out = env.serializerManager.wrapForEncryption(new FileOutputStream(abspath)) DechunkedInputStream.dechunkAndCopyToOutput(in, out) } } Array(encryptionServer.port, encryptionServer.secret) } + def setupDecryptionServer(): Array[Any] = { +decryptionServer = new PythonServer[Unit]("broadcast-decrypt-server-for-driver") { + override def handleConnection(sock: Socket): Unit = { +val out = new DataOutputStream(new BufferedOutputStream(sock.getOutputStream())) +Utils.tryWithSafeFinally { + val in = SparkEnv.get.serializerManager.wrapForEncryption(new FileInputStream(path)) + Utils.tryWithSafeFinally { +Utils.copyStream(in, out, false) + } { +in.close() + } + out.flush() +} { + JavaUtils.closeQuietly(out) +} + } +} +Array(decryptionServer.port, decryptionServer.secret) + } + + def waitTillBroadcastDataSent(): Unit = decryptionServer.getResult() + def waitTillDataReceived(): Unit = encryptionServer.getResult() } // scalastyle:on no.finalize http://git-wip-us.apache.org/repos/asf/spark/blob/9b23be2e/python/pyspark/broadcast.py -- diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py index 1c7f2a7..29358b5 100644 ---
[1/3] spark git commit: [SPARK-21809] Change Stage Page to use datatables to support sorting columns and searching
Repository: spark Updated Branches: refs/heads/master 3df307aa5 -> 76ef02e49 http://git-wip-us.apache.org/repos/asf/spark/blob/76ef02e4/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json -- diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json index a15ee23..f859ab6 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json @@ -43,7 +43,10 @@ "writeTime" : 3842811, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 53, + "gettingResultTime" : 0 }, { "taskId" : 1, "index" : 1, @@ -89,7 +92,10 @@ "writeTime" : 3934399, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 40, + "gettingResultTime" : 0 }, { "taskId" : 2, "index" : 2, @@ -135,7 +141,10 @@ "writeTime" : 89885, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 37, + "gettingResultTime" : 0 }, { "taskId" : 3, "index" : 3, @@ -181,7 +190,10 @@ "writeTime" : 1311694, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 41, + "gettingResultTime" : 0 }, { "taskId" : 4, "index" : 4, @@ -227,7 +239,10 @@ "writeTime" : 83022, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 38, + "gettingResultTime" : 0 }, { "taskId" : 5, "index" : 5, @@ -273,7 +288,10 @@ "writeTime" : 3675510, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 33, + "gettingResultTime" : 0 }, { "taskId" : 6, "index" : 6, @@ -319,7 +337,10 @@ "writeTime" : 4016617, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 38, + "gettingResultTime" : 0 }, { "taskId" : 7, "index" : 7, @@ -365,7 +386,10 @@ "writeTime" : 2579051, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 43, + "gettingResultTime" : 0 }, { "taskId" : 8, "index" : 8, @@ -411,7 +435,10 @@ "writeTime" : 121551, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 5, + "gettingResultTime" : 0 }, { "taskId" : 9, "index" : 9, @@ -457,7 +484,10 @@ "writeTime" : 101664, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 8, + "gettingResultTime" : 0 }, { "taskId" : 10, "index" : 10, @@ -503,7 +533,10 @@ "writeTime" : 94709, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 18, + "gettingResultTime" : 0 }, { "taskId" : 11, "index" : 11, @@ -549,7 +582,10 @@ "writeTime" : 94507, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 8, + "gettingResultTime" : 0 }, { "taskId" : 12, "index" : 12, @@ -595,7 +631,10 @@ "writeTime" : 102476, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 7, + "gettingResultTime" : 0 }, { "taskId" : 13, "index" : 13, @@ -641,7 +680,10 @@ "writeTime" : 95004, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 53, + "gettingResultTime" : 0 }, { "taskId" : 14, "index" : 14, @@ -687,7 +729,10 @@ "writeTime" : 95646, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 5, + "gettingResultTime" : 0 }, { "taskId" : 15, "index" : 15, @@ -733,7 +778,10 @@ "writeTime" : 602780, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 4, + "gettingResultTime" : 0 }, { "taskId" : 16, "index" : 16, @@ -779,7 +827,10 @@ "writeTime" : 108320, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 4, + "gettingResultTime" : 0 }, { "taskId" : 17, "index" : 17, @@ -825,7 +876,10 @@ "writeTime" : 99944, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 20, + "gettingResultTime" : 0 }, { "taskId" : 18, "index" : 18, @@ -871,7 +925,10 @@ "writeTime" : 100836, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 3, + "gettingResultTime" : 0 }, { "taskId" : 19, "index" : 19, @@ -917,5 +974,8 @@ "writeTime" : 95788, "recordsWritten" : 10 } - } + }, + "executorLogs" : { }, + "schedulerDelay" : 5, + "gettingResultTime" : 0 } ]
[3/3] spark git commit: [SPARK-21809] Change Stage Page to use datatables to support sorting columns and searching
[SPARK-21809] Change Stage Page to use datatables to support sorting columns and searching Support column sort, pagination and search for Stage Page using jQuery DataTable and REST API. Before this commit, the Stage page generated a hard-coded HTML table that could not support search. Supporting search and sort (over all applications rather than the 20 entries in the current page) in any case will greatly improve the user experience. Created the stagespage-template.html for displaying application information in datables. Added REST api endpoint and javascript code to fetch data from the endpoint and display it on the data table. Because of the above change, certain functionalities in the page had to be modified to support the addition of datatables. For example, the toggle checkbox 'Select All' previously would add the checked fields as columns in the Task table and as rows in the Summary Metrics table, but after the change, only columns are added in the Task Table as it got tricky to add rows dynamically in the datatables. ## How was this patch tested? I have attached the screenshots of the Stage Page UI before and after the fix. **Before:** https://user-images.githubusercontent.com/8190/42137915-52054558-7d3a-11e8-8c85-433b2c94161d.png;> https://user-images.githubusercontent.com/8190/42137928-79df500a-7d3a-11e8-9068-5630afe46ff3.png;> **After:** https://user-images.githubusercontent.com/8190/42137936-a3fb9f42-7d3a-11e8-8502-22b3897cbf64.png;> https://user-images.githubusercontent.com/8190/42137970-0fabc58c-7d3b-11e8-95ad-383b1bd1f106.png;> Closes #21688 from pgandhi999/SPARK-21809-2.3. Authored-by: pgandhi Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/76ef02e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/76ef02e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/76ef02e4 Branch: refs/heads/master Commit: 76ef02e499db49c0c6a37fa9dff3d731aeac9898 Parents: 3df307a Author: pgandhi Authored: Mon Nov 26 14:08:32 2018 -0600 Committer: Thomas Graves Committed: Mon Nov 26 14:08:32 2018 -0600 -- .../spark/ui/static/executorspage-template.html |8 +- .../org/apache/spark/ui/static/executorspage.js | 84 +- .../apache/spark/ui/static/images/sort_asc.png | Bin 0 -> 160 bytes .../ui/static/images/sort_asc_disabled.png | Bin 0 -> 148 bytes .../apache/spark/ui/static/images/sort_both.png | Bin 0 -> 201 bytes .../apache/spark/ui/static/images/sort_desc.png | Bin 0 -> 158 bytes .../ui/static/images/sort_desc_disabled.png | Bin 0 -> 146 bytes .../org/apache/spark/ui/static/stagepage.js | 958 + .../spark/ui/static/stagespage-template.html| 124 ++ .../org/apache/spark/ui/static/utils.js | 113 +- .../apache/spark/ui/static/webui-dataTables.css | 20 + .../org/apache/spark/ui/static/webui.css| 101 ++ .../apache/spark/status/AppStatusStore.scala| 26 +- .../spark/status/api/v1/StagesResource.scala| 121 +- .../org/apache/spark/status/api/v1/api.scala|5 +- .../org/apache/spark/status/storeTypes.scala|5 +- .../scala/org/apache/spark/ui/UIUtils.scala |2 + .../apache/spark/ui/jobs/ExecutorTable.scala| 149 -- .../org/apache/spark/ui/jobs/StagePage.scala| 325 + .../blacklisting_for_stage_expectation.json | 1287 +- ...blacklisting_node_for_stage_expectation.json | 112 +- .../one_stage_attempt_json_expectation.json | 40 +- .../one_stage_json_expectation.json | 40 +- .../stage_task_list_expectation.json| 100 +- ...m_multi_attempt_app_json_1__expectation.json | 40 +- ...m_multi_attempt_app_json_2__expectation.json | 40 +- ...ask_list_w__offset___length_expectation.json | 250 +++- .../stage_task_list_w__sortBy_expectation.json | 100 +- ...ortBy_short_names___runtime_expectation.json | 100 +- ...sortBy_short_names__runtime_expectation.json | 100 +- ...stage_with_accumulable_json_expectation.json | 150 +- .../spark/status/AppStatusUtilsSuite.scala | 10 +- .../org/apache/spark/ui/StagePageSuite.scala| 12 - 33 files changed, 3064 insertions(+), 1358 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/76ef02e4/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html index 5c91304..f2c17ae 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html +++
[2/3] spark git commit: [SPARK-21809] Change Stage Page to use datatables to support sorting columns and searching
http://git-wip-us.apache.org/repos/asf/spark/blob/76ef02e4/core/src/main/scala/org/apache/spark/status/api/v1/api.scala -- diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala index 30afd8b..aa21da2 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala @@ -253,7 +253,10 @@ class TaskData private[spark]( val speculative: Boolean, val accumulatorUpdates: Seq[AccumulableInfo], val errorMessage: Option[String] = None, -val taskMetrics: Option[TaskMetrics] = None) +val taskMetrics: Option[TaskMetrics] = None, +val executorLogs: Map[String, String], +val schedulerDelay: Long, +val gettingResultTime: Long) class TaskMetrics private[spark]( val executorDeserializeTime: Long, http://git-wip-us.apache.org/repos/asf/spark/blob/76ef02e4/core/src/main/scala/org/apache/spark/status/storeTypes.scala -- diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala index 646cf25..ef19e86 100644 --- a/core/src/main/scala/org/apache/spark/status/storeTypes.scala +++ b/core/src/main/scala/org/apache/spark/status/storeTypes.scala @@ -283,7 +283,10 @@ private[spark] class TaskDataWrapper( speculative, accumulatorUpdates, errorMessage, - metrics) + metrics, + executorLogs = null, + schedulerDelay = 0L, + gettingResultTime = 0L) } @JsonIgnore @KVIndex(TaskIndexNames.STAGE) http://git-wip-us.apache.org/repos/asf/spark/blob/76ef02e4/core/src/main/scala/org/apache/spark/ui/UIUtils.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index 3aed464..60a9293 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -204,6 +204,8 @@ private[spark] object UIUtils extends Logging { href={prependBaseUri(request, "/static/dataTables.bootstrap.css")} type="text/css"/> + http://git-wip-us.apache.org/repos/asf/spark/blob/76ef02e4/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala deleted file mode 100644 index 1be81e5..000 --- a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.ui.jobs - -import scala.xml.{Node, Unparsed} - -import org.apache.spark.status.AppStatusStore -import org.apache.spark.status.api.v1.StageData -import org.apache.spark.ui.{ToolTips, UIUtils} -import org.apache.spark.util.Utils - -/** Stage summary grouped by executors. */ -private[ui] class ExecutorTable(stage: StageData, store: AppStatusStore) { - - import ApiHelper._ - - def toNodeSeq: Seq[Node] = { - - -Executor ID -Address -Task Time -Total Tasks -Failed Tasks -Killed Tasks -Succeeded Tasks -{if (hasInput(stage)) { - -Input Size / Records - -}} -{if (hasOutput(stage)) { - -Output Size / Records - -}} -{if (hasShuffleRead(stage)) { - - -Shuffle Read Size / Records - -}} -{if (hasShuffleWrite(stage)) { - - -Shuffle Write Size / Records - -}} -{if (hasBytesSpilled(stage)) { - Shuffle Spill (Memory) - Shuffle Spill (Disk) -}} - - - Blacklisted - - -Logs - -
spark git commit: [SPARK-22148][SPARK-15815][SCHEDULER] Acquire new executors to avoid hang because of blacklisting
Repository: spark Updated Branches: refs/heads/branch-2.4 f98c0ad02 -> 52e9711d0 [SPARK-22148][SPARK-15815][SCHEDULER] Acquire new executors to avoid hang because of blacklisting ## What changes were proposed in this pull request? Every time a task is unschedulable because of the condition where no. of task failures < no. of executors available, we currently abort the taskSet - failing the job. This change tries to acquire new executors so that we can complete the job successfully. We try to acquire a new executor only when we can kill an existing idle executor. We fallback to the older implementation where we abort the job if we cannot find an idle executor. ## How was this patch tested? I performed some manual tests to check and validate the behavior. ```scala val rdd = sc.parallelize(Seq(1 to 10), 3) import org.apache.spark.TaskContext val mapped = rdd.mapPartitionsWithIndex ( (index, iterator) => { if (index == 2) { Thread.sleep(30 * 1000); val attemptNum = TaskContext.get.attemptNumber; if (attemptNum < 3) throw new Exception("Fail for blacklisting")}; iterator.toList.map (x => x + " -> " + index).iterator } ) mapped.collect ``` Closes #22288 from dhruve/bug/SPARK-22148. Lead-authored-by: Dhruve Ashar Co-authored-by: Dhruve Ashar Co-authored-by: Tom Graves Signed-off-by: Thomas Graves (cherry picked from commit fdd3bace1da01e5958fe0345c38e889e740ce25e) Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/52e9711d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/52e9711d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/52e9711d Branch: refs/heads/branch-2.4 Commit: 52e9711d01694158ecb3691f2ec25c0ebe4b0207 Parents: f98c0ad Author: Dhruve Ashar Authored: Tue Nov 6 08:25:32 2018 -0600 Committer: Thomas Graves Committed: Tue Nov 6 08:25:59 2018 -0600 -- .../apache/spark/internal/config/package.scala | 8 + .../spark/scheduler/BlacklistTracker.scala | 30 ++- .../spark/scheduler/TaskSchedulerImpl.scala | 71 ++- .../apache/spark/scheduler/TaskSetManager.scala | 41 ++-- .../scheduler/BlacklistIntegrationSuite.scala | 7 +- .../scheduler/TaskSchedulerImplSuite.scala | 189 ++- docs/configuration.md | 8 + 7 files changed, 318 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/52e9711d/core/src/main/scala/org/apache/spark/internal/config/package.scala -- diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index e723819..5836d27 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -592,6 +592,14 @@ package object config { .checkValue(v => v > 0, "The value should be a positive time value.") .createWithDefaultString("365d") + private[spark] val UNSCHEDULABLE_TASKSET_TIMEOUT = +ConfigBuilder("spark.scheduler.blacklist.unschedulableTaskSetTimeout") + .doc("The timeout in seconds to wait to acquire a new executor and schedule a task " + +"before aborting a TaskSet which is unschedulable because of being completely blacklisted.") + .timeConf(TimeUnit.SECONDS) + .checkValue(v => v >= 0, "The value should be a non negative time value.") + .createWithDefault(120) + private[spark] val BARRIER_MAX_CONCURRENT_TASKS_CHECK_INTERVAL = ConfigBuilder("spark.scheduler.barrier.maxConcurrentTasksCheck.interval") .doc("Time in seconds to wait between a max concurrent tasks check failure and the next " + http://git-wip-us.apache.org/repos/asf/spark/blob/52e9711d/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala index 980fbbe..ef6d02d 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala @@ -146,21 +146,31 @@ private[scheduler] class BlacklistTracker ( nextExpiryTime = math.min(execMinExpiry, nodeMinExpiry) } + private def killExecutor(exec: String, msg: String): Unit = { +allocationClient match { + case Some(a) => +logInfo(msg) +a.killExecutors(Seq(exec), adjustTargetNumExecutors = false, countFailures = false, + force = true) + case None => +logInfo(s"Not attempting to kill blacklisted executor id $exec " + +
spark git commit: [SPARK-22148][SPARK-15815][SCHEDULER] Acquire new executors to avoid hang because of blacklisting
Repository: spark Updated Branches: refs/heads/master 3ed91c9b8 -> fdd3bace1 [SPARK-22148][SPARK-15815][SCHEDULER] Acquire new executors to avoid hang because of blacklisting ## What changes were proposed in this pull request? Every time a task is unschedulable because of the condition where no. of task failures < no. of executors available, we currently abort the taskSet - failing the job. This change tries to acquire new executors so that we can complete the job successfully. We try to acquire a new executor only when we can kill an existing idle executor. We fallback to the older implementation where we abort the job if we cannot find an idle executor. ## How was this patch tested? I performed some manual tests to check and validate the behavior. ```scala val rdd = sc.parallelize(Seq(1 to 10), 3) import org.apache.spark.TaskContext val mapped = rdd.mapPartitionsWithIndex ( (index, iterator) => { if (index == 2) { Thread.sleep(30 * 1000); val attemptNum = TaskContext.get.attemptNumber; if (attemptNum < 3) throw new Exception("Fail for blacklisting")}; iterator.toList.map (x => x + " -> " + index).iterator } ) mapped.collect ``` Closes #22288 from dhruve/bug/SPARK-22148. Lead-authored-by: Dhruve Ashar Co-authored-by: Dhruve Ashar Co-authored-by: Tom Graves Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fdd3bace Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fdd3bace Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fdd3bace Branch: refs/heads/master Commit: fdd3bace1da01e5958fe0345c38e889e740ce25e Parents: 3ed91c9 Author: Dhruve Ashar Authored: Tue Nov 6 08:25:32 2018 -0600 Committer: Thomas Graves Committed: Tue Nov 6 08:25:32 2018 -0600 -- .../apache/spark/internal/config/package.scala | 8 + .../spark/scheduler/BlacklistTracker.scala | 30 ++- .../spark/scheduler/TaskSchedulerImpl.scala | 71 ++- .../apache/spark/scheduler/TaskSetManager.scala | 41 ++-- .../scheduler/BlacklistIntegrationSuite.scala | 7 +- .../scheduler/TaskSchedulerImplSuite.scala | 189 ++- docs/configuration.md | 8 + 7 files changed, 318 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fdd3bace/core/src/main/scala/org/apache/spark/internal/config/package.scala -- diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index c8993e1..2b3ba3c 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -622,6 +622,14 @@ package object config { .checkValue(v => v > 0, "The value should be a positive time value.") .createWithDefaultString("365d") + private[spark] val UNSCHEDULABLE_TASKSET_TIMEOUT = +ConfigBuilder("spark.scheduler.blacklist.unschedulableTaskSetTimeout") + .doc("The timeout in seconds to wait to acquire a new executor and schedule a task " + +"before aborting a TaskSet which is unschedulable because of being completely blacklisted.") + .timeConf(TimeUnit.SECONDS) + .checkValue(v => v >= 0, "The value should be a non negative time value.") + .createWithDefault(120) + private[spark] val BARRIER_MAX_CONCURRENT_TASKS_CHECK_INTERVAL = ConfigBuilder("spark.scheduler.barrier.maxConcurrentTasksCheck.interval") .doc("Time in seconds to wait between a max concurrent tasks check failure and the next " + http://git-wip-us.apache.org/repos/asf/spark/blob/fdd3bace/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala index 980fbbe..ef6d02d 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala @@ -146,21 +146,31 @@ private[scheduler] class BlacklistTracker ( nextExpiryTime = math.min(execMinExpiry, nodeMinExpiry) } + private def killExecutor(exec: String, msg: String): Unit = { +allocationClient match { + case Some(a) => +logInfo(msg) +a.killExecutors(Seq(exec), adjustTargetNumExecutors = false, countFailures = false, + force = true) + case None => +logInfo(s"Not attempting to kill blacklisted executor id $exec " + + s"since allocation client is not defined.") +} + } + private def killBlacklistedExecutor(exec:
spark git commit: [SPARK-25023] Clarify Spark security documentation
Repository: spark Updated Branches: refs/heads/branch-2.4 8c508da2a -> ea11d1142 [SPARK-25023] Clarify Spark security documentation ## What changes were proposed in this pull request? Clarify documentation about security. ## How was this patch tested? None, just documentation Closes #22852 from tgravescs/SPARK-25023. Authored-by: Thomas Graves Signed-off-by: Thomas Graves (cherry picked from commit c00186f90cfcc33492d760f874ead34f0e3da6ed) Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea11d114 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea11d114 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea11d114 Branch: refs/heads/branch-2.4 Commit: ea11d114264560638129eac1db3aa1dc12a206a2 Parents: 8c508da Author: Thomas Graves Authored: Fri Nov 2 10:56:30 2018 -0500 Committer: Thomas Graves Committed: Fri Nov 2 10:56:44 2018 -0500 -- docs/index.md | 5 + docs/quick-start.md | 5 + docs/running-on-kubernetes.md | 5 + docs/running-on-mesos.md | 5 + docs/running-on-yarn.md | 5 + docs/security.md | 17 +++-- docs/spark-standalone.md | 5 + 7 files changed, 45 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ea11d114/docs/index.md -- diff --git a/docs/index.md b/docs/index.md index 40f628b..0300528 100644 --- a/docs/index.md +++ b/docs/index.md @@ -10,6 +10,11 @@ It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution graphs. It also supports a rich set of higher-level tools including [Spark SQL](sql-programming-guide.html) for SQL and structured data processing, [MLlib](ml-guide.html) for machine learning, [GraphX](graphx-programming-guide.html) for graph processing, and [Spark Streaming](streaming-programming-guide.html). +# Security + +Security in Spark is OFF by default. This could mean you are vulnerable to attack by default. +Please see [Spark Security](security.html) before downloading and running Spark. + # Downloading Get Spark from the [downloads page](https://spark.apache.org/downloads.html) of the project website. This documentation is for Spark version {{site.SPARK_VERSION}}. Spark uses Hadoop's client libraries for HDFS and YARN. Downloads are pre-packaged for a handful of popular Hadoop versions. http://git-wip-us.apache.org/repos/asf/spark/blob/ea11d114/docs/quick-start.md -- diff --git a/docs/quick-start.md b/docs/quick-start.md index ef7af6c..28186c1 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -17,6 +17,11 @@ you can download a package for any version of Hadoop. Note that, before Spark 2.0, the main programming interface of Spark was the Resilient Distributed Dataset (RDD). After Spark 2.0, RDDs are replaced by Dataset, which is strongly-typed like an RDD, but with richer optimizations under the hood. The RDD interface is still supported, and you can get a more detailed reference at the [RDD programming guide](rdd-programming-guide.html). However, we highly recommend you to switch to use Dataset, which has better performance than RDD. See the [SQL programming guide](sql-programming-guide.html) to get more information about Dataset. +# Security + +Security in Spark is OFF by default. This could mean you are vulnerable to attack by default. +Please see [Spark Security](security.html) before running Spark. + # Interactive Analysis with the Spark Shell ## Basics http://git-wip-us.apache.org/repos/asf/spark/blob/ea11d114/docs/running-on-kubernetes.md -- diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index f19aa41..754b1ff 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -12,6 +12,11 @@ Kubernetes scheduler that has been added to Spark. In future versions, there may be behavioral changes around configuration, container images and entrypoints.** +# Security + +Security in Spark is OFF by default. This could mean you are vulnerable to attack by default. +Please see [Spark Security](security.html) and the specific security sections in this doc before running Spark. + # Prerequisites * A runnable distribution of Spark 2.3 or above. http://git-wip-us.apache.org/repos/asf/spark/blob/ea11d114/docs/running-on-mesos.md -- diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index b473e65..2502cd4 100644 --- a/docs/running-on-mesos.md +++
spark git commit: [SPARK-25023] Clarify Spark security documentation
Repository: spark Updated Branches: refs/heads/master e91b60771 -> c00186f90 [SPARK-25023] Clarify Spark security documentation ## What changes were proposed in this pull request? Clarify documentation about security. ## How was this patch tested? None, just documentation Closes #22852 from tgravescs/SPARK-25023. Authored-by: Thomas Graves Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c00186f9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c00186f9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c00186f9 Branch: refs/heads/master Commit: c00186f90cfcc33492d760f874ead34f0e3da6ed Parents: e91b607 Author: Thomas Graves Authored: Fri Nov 2 10:56:30 2018 -0500 Committer: Thomas Graves Committed: Fri Nov 2 10:56:30 2018 -0500 -- docs/index.md | 5 + docs/quick-start.md | 5 + docs/running-on-kubernetes.md | 5 + docs/running-on-mesos.md | 5 + docs/running-on-yarn.md | 5 + docs/security.md | 17 +++-- docs/spark-standalone.md | 5 + 7 files changed, 45 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c00186f9/docs/index.md -- diff --git a/docs/index.md b/docs/index.md index d269f54..ac38f1d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -10,6 +10,11 @@ It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution graphs. It also supports a rich set of higher-level tools including [Spark SQL](sql-programming-guide.html) for SQL and structured data processing, [MLlib](ml-guide.html) for machine learning, [GraphX](graphx-programming-guide.html) for graph processing, and [Spark Streaming](streaming-programming-guide.html). +# Security + +Security in Spark is OFF by default. This could mean you are vulnerable to attack by default. +Please see [Spark Security](security.html) before downloading and running Spark. + # Downloading Get Spark from the [downloads page](https://spark.apache.org/downloads.html) of the project website. This documentation is for Spark version {{site.SPARK_VERSION}}. Spark uses Hadoop's client libraries for HDFS and YARN. Downloads are pre-packaged for a handful of popular Hadoop versions. http://git-wip-us.apache.org/repos/asf/spark/blob/c00186f9/docs/quick-start.md -- diff --git a/docs/quick-start.md b/docs/quick-start.md index ef7af6c..28186c1 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -17,6 +17,11 @@ you can download a package for any version of Hadoop. Note that, before Spark 2.0, the main programming interface of Spark was the Resilient Distributed Dataset (RDD). After Spark 2.0, RDDs are replaced by Dataset, which is strongly-typed like an RDD, but with richer optimizations under the hood. The RDD interface is still supported, and you can get a more detailed reference at the [RDD programming guide](rdd-programming-guide.html). However, we highly recommend you to switch to use Dataset, which has better performance than RDD. See the [SQL programming guide](sql-programming-guide.html) to get more information about Dataset. +# Security + +Security in Spark is OFF by default. This could mean you are vulnerable to attack by default. +Please see [Spark Security](security.html) before running Spark. + # Interactive Analysis with the Spark Shell ## Basics http://git-wip-us.apache.org/repos/asf/spark/blob/c00186f9/docs/running-on-kubernetes.md -- diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 2917197..9052268 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -12,6 +12,11 @@ Kubernetes scheduler that has been added to Spark. In future versions, there may be behavioral changes around configuration, container images and entrypoints.** +# Security + +Security in Spark is OFF by default. This could mean you are vulnerable to attack by default. +Please see [Spark Security](security.html) and the specific security sections in this doc before running Spark. + # Prerequisites * A runnable distribution of Spark 2.3 or above. http://git-wip-us.apache.org/repos/asf/spark/blob/c00186f9/docs/running-on-mesos.md -- diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index b473e65..2502cd4 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -13,6 +13,11 @@ The advantages of deploying Spark with Mesos include:
spark git commit: [SPARK-24851][UI] Map a Stage ID to it's Associated Job ID
Repository: spark Updated Branches: refs/heads/master e3133f4ab -> deb9588b2 [SPARK-24851][UI] Map a Stage ID to it's Associated Job ID It would be nice to have a field in Stage Page UI which would show mapping of the current stage id to the job id's to which that stage belongs to. ## What changes were proposed in this pull request? Added a field in Stage UI to display the corresponding job id for that particular stage. ## How was this patch tested? https://user-images.githubusercontent.com/8190/43220447-a8e94f80-900f-11e8-8a20-a235bbd5a369.png;> Closes #21809 from pgandhi999/SPARK-24851. Authored-by: pgandhi Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/deb9588b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/deb9588b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/deb9588b Branch: refs/heads/master Commit: deb9588b2ab6596b30ab17f56c59951cabf57162 Parents: e3133f4 Author: pgandhi Authored: Tue Oct 9 08:59:21 2018 -0500 Committer: Thomas Graves Committed: Tue Oct 9 08:59:21 2018 -0500 -- .../scala/org/apache/spark/status/AppStatusStore.scala | 8 +--- .../apache/spark/status/api/v1/StagesResource.scala| 2 +- .../scala/org/apache/spark/ui/jobs/StagePage.scala | 13 +++-- 3 files changed, 17 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/deb9588b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala -- diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala index e237281..9839cbb 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala @@ -112,10 +112,12 @@ private[spark] class AppStatusStore( } } - def stageAttempt(stageId: Int, stageAttemptId: Int, details: Boolean = false): v1.StageData = { + def stageAttempt(stageId: Int, stageAttemptId: Int, + details: Boolean = false): (v1.StageData, Seq[Int]) = { val stageKey = Array(stageId, stageAttemptId) -val stage = store.read(classOf[StageDataWrapper], stageKey).info -if (details) stageWithDetails(stage) else stage +val stageDataWrapper = store.read(classOf[StageDataWrapper], stageKey) +val stage = if (details) stageWithDetails(stageDataWrapper.info) else stageDataWrapper.info +(stage, stageDataWrapper.jobIds.toSeq) } def taskCount(stageId: Int, stageAttemptId: Int): Long = { http://git-wip-us.apache.org/repos/asf/spark/blob/deb9588b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala -- diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala index 96249e4..30d52b9 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala @@ -56,7 +56,7 @@ private[v1] class StagesResource extends BaseAppResource { @PathParam("stageAttemptId") stageAttemptId: Int, @QueryParam("details") @DefaultValue("true") details: Boolean): StageData = withUI { ui => try { - ui.store.stageAttempt(stageId, stageAttemptId, details = details) + ui.store.stageAttempt(stageId, stageAttemptId, details = details)._1 } catch { case _: NoSuchElementException => // Change the message depending on whether there are any attempts for the requested stage. http://git-wip-us.apache.org/repos/asf/spark/blob/deb9588b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index 7428bbe..0f74b07 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -105,7 +105,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We val stageAttemptId = parameterAttempt.toInt val stageHeader = s"Details for Stage $stageId (Attempt $stageAttemptId)" -val stageData = parent.store +val (stageData, stageJobIds) = parent.store .asOption(parent.store.stageAttempt(stageId, stageAttemptId, details = false)) .getOrElse { val content = @@ -183,6 +183,15 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
spark git commit: [SPARK-25641] Change the spark.shuffle.server.chunkFetchHandlerThreadsPercent default to 100
Repository: spark Updated Branches: refs/heads/master 1a2862535 -> 6353425af [SPARK-25641] Change the spark.shuffle.server.chunkFetchHandlerThreadsPercent default to 100 ## What changes were proposed in this pull request? We want to change the default percentage to 100 for spark.shuffle.server.chunkFetchHandlerThreadsPercent. The reason being currently this is set to 0. Which means currently if server.ioThreads > 0, the default number of threads would be 2 * #cores instead of server.io.Threads. We want the default to server.io.Threads in case this is not set at all. Also here a default of 0 would also mean 2 * #cores ## How was this patch tested? Manual (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Please review http://spark.apache.org/contributing.html before opening a pull request. Closes #22628 from redsanket/SPARK-25641. Lead-authored-by: Sanket Chintapalli Co-authored-by: Sanket Chintapalli Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6353425a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6353425a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6353425a Branch: refs/heads/master Commit: 6353425af76f9cc9de7ee4094f41df7a7390d898 Parents: 1a28625 Author: Sanket Chintapalli Authored: Mon Oct 8 13:19:34 2018 -0500 Committer: Thomas Graves Committed: Mon Oct 8 13:19:34 2018 -0500 -- .../apache/spark/network/util/TransportConf.java| 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6353425a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java -- diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java index 6d5cccd..43a6bc7 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java @@ -296,17 +296,21 @@ public class TransportConf { * and could take long time to process due to disk contentions. By configuring a slightly * higher number of shuffler server threads, we are able to reserve some threads for * handling other RPC messages, thus making the Client less likely to experience timeout - * when sending RPC messages to the shuffle server. Default to 0, which is 2*#cores - * or io.serverThreads. 90 would mean 90% of 2*#cores or 90% of io.serverThreads - * which equals 0.9 * 2*#cores or 0.9 * io.serverThreads. + * when sending RPC messages to the shuffle server. The number of threads used for handling + * chunked fetch requests are percentage of io.serverThreads (if defined) else it is a percentage + * of 2 * #cores. However, a percentage of 0 means netty default number of threads which + * is 2 * #cores ignoring io.serverThreads. The percentage here is configured via + * spark.shuffle.server.chunkFetchHandlerThreadsPercent. The returned value is rounded off to + * ceiling of the nearest integer. */ public int chunkFetchHandlerThreads() { if (!this.getModuleName().equalsIgnoreCase("shuffle")) { return 0; } int chunkFetchHandlerThreadsPercent = - conf.getInt("spark.shuffle.server.chunkFetchHandlerThreadsPercent", 0); -return this.serverThreads() > 0 ? (this.serverThreads() * chunkFetchHandlerThreadsPercent)/100: - (2 * NettyRuntime.availableProcessors() * chunkFetchHandlerThreadsPercent)/100; + conf.getInt("spark.shuffle.server.chunkFetchHandlerThreadsPercent", 100); +return (int)Math.ceil( + (this.serverThreads() > 0 ? this.serverThreads() : 2 * NettyRuntime.availableProcessors()) * + chunkFetchHandlerThreadsPercent/(double)100); } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18364][YARN] Expose metrics for YarnShuffleService
Repository: spark Updated Branches: refs/heads/master b96fd44f0 -> a802c69b1 [SPARK-18364][YARN] Expose metrics for YarnShuffleService ## What changes were proposed in this pull request? This PR is follow-up of closed https://github.com/apache/spark/pull/17401 which only ended due to of inactivity, but its still nice feature to have. Given review by jerryshao taken in consideration and edited: - VisibleForTesting deleted because of dependency conflicts - removed unnecessary reflection for `MetricsSystemImpl` - added more available types for gauge ## How was this patch tested? Manual deploy of new yarn-shuffle jar into a Node Manager and verifying that the metrics appear in the Node Manager-standard location. This is JMX with an query endpoint running on `hostname:port` Resulting metrics look like this: ``` curl -sk -XGET hostname:port | grep -v '#' | grep 'shuffleService' hadoop_nodemanager_openblockrequestlatencymillis_rate15{name="shuffleService",} 0.31428910657834713 hadoop_nodemanager_blocktransferratebytes_rate15{name="shuffleService",} 566144.9983653595 hadoop_nodemanager_blocktransferratebytes_ratemean{name="shuffleService",} 2464409.9678099006 hadoop_nodemanager_openblockrequestlatencymillis_rate1{name="shuffleService",} 1.2893844732240272 hadoop_nodemanager_registeredexecutorssize{name="shuffleService",} 2.0 hadoop_nodemanager_openblockrequestlatencymillis_ratemean{name="shuffleService",} 1.255574678369966 hadoop_nodemanager_openblockrequestlatencymillis_count{name="shuffleService",} 315.0 hadoop_nodemanager_openblockrequestlatencymillis_rate5{name="shuffleService",} 0.7661929192569739 hadoop_nodemanager_registerexecutorrequestlatencymillis_ratemean{name="shuffleService",} 0.0 hadoop_nodemanager_registerexecutorrequestlatencymillis_count{name="shuffleService",} 0.0 hadoop_nodemanager_registerexecutorrequestlatencymillis_rate1{name="shuffleService",} 0.0 hadoop_nodemanager_registerexecutorrequestlatencymillis_rate5{name="shuffleService",} 0.0 hadoop_nodemanager_blocktransferratebytes_count{name="shuffleService",} 6.18271213E8 hadoop_nodemanager_registerexecutorrequestlatencymillis_rate15{name="shuffleService",} 0.0 hadoop_nodemanager_blocktransferratebytes_rate5{name="shuffleService",} 1154114.4881816586 hadoop_nodemanager_blocktransferratebytes_rate1{name="shuffleService",} 574745.0749848988 ``` Closes #22485 from mareksimunek/SPARK-18364. Lead-authored-by: marek.simunek Co-authored-by: Andrew Ash Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a802c69b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a802c69b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a802c69b Branch: refs/heads/master Commit: a802c69b130b69a35b372ffe1b01289577f6fafb Parents: b96fd44 Author: marek.simunek Authored: Mon Oct 1 11:04:37 2018 -0500 Committer: Thomas Graves Committed: Mon Oct 1 11:04:37 2018 -0500 -- .../spark/network/yarn/YarnShuffleService.java | 11 ++ .../network/yarn/YarnShuffleServiceMetrics.java | 137 +++ .../yarn/YarnShuffleServiceMetricsSuite.scala | 73 ++ 3 files changed, 221 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a802c69b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java -- diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java index d8b2ed6..72ae1a1 100644 --- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java +++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java @@ -35,6 +35,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.metrics2.impl.MetricsSystemImpl; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.server.api.*; import org.apache.spark.network.util.LevelDBProvider; @@ -168,6 +170,15 @@ public class YarnShuffleService extends AuxiliaryService { TransportConf transportConf = new TransportConf("shuffle", new HadoopConfigProvider(conf)); blockHandler = new ExternalShuffleBlockHandler(transportConf, registeredExecutorFile); + // register metrics on the block handler into the Node Manager's metrics system. + YarnShuffleServiceMetrics serviceMetrics = +new YarnShuffleServiceMetrics(blockHandler.getAllMetrics()); + + MetricsSystemImpl
spark git commit: [SPARK-24355] Spark external shuffle server improvement to better handle block fetch requests.
Repository: spark Updated Branches: refs/heads/master 2c9d8f56c -> ff601cf71 [SPARK-24355] Spark external shuffle server improvement to better handle block fetch requests. ## What changes were proposed in this pull request? Description: Right now, the default server side netty handler threads is 2 * # cores, and can be further configured with parameter spark.shuffle.io.serverThreads. In order to process a client request, it would require one available server netty handler thread. However, when the server netty handler threads start to process ChunkFetchRequests, they will be blocked on disk I/O, mostly due to disk contentions from the random read operations initiated by all the ChunkFetchRequests received from clients. As a result, when the shuffle server is serving many concurrent ChunkFetchRequests, the server side netty handler threads could all be blocked on reading shuffle files, thus leaving no handler thread available to process other types of requests which should all be very quick to process. This issue could potentially be fixed by limiting the number of netty handler threads that could get blocked when processing ChunkFetchRequest. We have a patch to do this by using a separate EventLoopGroup with a dedicated ChannelHandler to process ChunkFetchRequest. This enables shuffle server to reserve netty handler threads for non-ChunkFetchRequest, thus enabling consistent processing time for these requests which are fast to process. After deploying the patch in our infrastructure, we no longer see timeout issues with either executor registration with local shuffle server or shuffle client establishing connection with remote shuffle server. (Please fill in changes proposed in this fix) For Original PR please refer here https://github.com/apache/spark/pull/21402 ## How was this patch tested? Unit tests and stress testing. (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Please review http://spark.apache.org/contributing.html before opening a pull request. Closes #22173 from redsanket/SPARK-24335. Authored-by: Sanket Chintapalli Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ff601cf7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ff601cf7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ff601cf7 Branch: refs/heads/master Commit: ff601cf71d226082e156c4ff9a8f5593aa7a2085 Parents: 2c9d8f5 Author: Sanket Chintapalli Authored: Fri Sep 21 09:05:56 2018 -0500 Committer: Thomas Graves Committed: Fri Sep 21 09:05:56 2018 -0500 -- .../apache/spark/network/TransportContext.java | 66 - .../server/ChunkFetchRequestHandler.java| 135 +++ .../network/server/TransportChannelHandler.java | 21 ++- .../network/server/TransportRequestHandler.java | 35 + .../spark/network/util/TransportConf.java | 28 .../network/ChunkFetchRequestHandlerSuite.java | 102 ++ .../spark/network/ExtendedChannelPromise.java | 69 ++ .../network/TransportRequestHandlerSuite.java | 55 +--- .../network/shuffle/ExternalShuffleClient.java | 2 +- 9 files changed, 425 insertions(+), 88 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ff601cf7/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java -- diff --git a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java index ae91bc9..480b526 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java +++ b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java @@ -21,6 +21,8 @@ import java.util.ArrayList; import java.util.List; import io.netty.channel.Channel; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.EventLoopGroup; import io.netty.channel.socket.SocketChannel; import io.netty.handler.timeout.IdleStateHandler; import org.slf4j.Logger; @@ -32,11 +34,13 @@ import org.apache.spark.network.client.TransportClientFactory; import org.apache.spark.network.client.TransportResponseHandler; import org.apache.spark.network.protocol.MessageDecoder; import org.apache.spark.network.protocol.MessageEncoder; +import org.apache.spark.network.server.ChunkFetchRequestHandler; import org.apache.spark.network.server.RpcHandler; import org.apache.spark.network.server.TransportChannelHandler; import org.apache.spark.network.server.TransportRequestHandler; import
spark git commit: [SPARK-24415][CORE] Fixed the aggregated stage metrics by retaining stage objects in liveStages until all tasks are complete
Repository: spark Updated Branches: refs/heads/branch-2.3 84922e506 -> 5b8b6b4e9 [SPARK-24415][CORE] Fixed the aggregated stage metrics by retaining stage objects in liveStages until all tasks are complete The problem occurs because stage object is removed from liveStages in AppStatusListener onStageCompletion. Because of this any onTaskEnd event received after onStageCompletion event do not update stage metrics. The fix is to retain stage objects in liveStages until all tasks are complete. 1. Fixed the reproducible example posted in the JIRA 2. Added unit test Closes #22209 from ankuriitg/ankurgupta/SPARK-24415. Authored-by: ankurgupta Signed-off-by: Marcelo Vanzin (cherry picked from commit 39a02d8f75def7191c66d388729ba1721c92188d) Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b8b6b4e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b8b6b4e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b8b6b4e Branch: refs/heads/branch-2.3 Commit: 5b8b6b4e9e36228e993a15cab19c80e7fad43786 Parents: 84922e5 Author: ankurgupta Authored: Wed Sep 5 09:41:05 2018 -0700 Committer: Thomas Graves Committed: Fri Sep 7 08:48:39 2018 -0500 -- .../apache/spark/status/AppStatusListener.scala | 61 +++- .../spark/status/AppStatusListenerSuite.scala | 55 ++ .../spark/streaming/UISeleniumSuite.scala | 9 ++- 3 files changed, 108 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5b8b6b4e/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala -- diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala index 496165c..9375ae9 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala @@ -310,11 +310,20 @@ private[spark] class AppStatusListener( val e = it.next() if (job.stageIds.contains(e.getKey()._1)) { val stage = e.getValue() - stage.status = v1.StageStatus.SKIPPED - job.skippedStages += stage.info.stageId - job.skippedTasks += stage.info.numTasks - it.remove() - update(stage, now) + if (v1.StageStatus.PENDING.equals(stage.status)) { +stage.status = v1.StageStatus.SKIPPED +job.skippedStages += stage.info.stageId +job.skippedTasks += stage.info.numTasks +job.activeStages -= 1 + +pools.get(stage.schedulingPool).foreach { pool => + pool.stageIds = pool.stageIds - stage.info.stageId + update(pool, now) +} + +it.remove() +update(stage, now, last = true) + } } } @@ -466,7 +475,16 @@ private[spark] class AppStatusListener( if (killedDelta > 0) { stage.killedSummary = killedTasksSummary(event.reason, stage.killedSummary) } - maybeUpdate(stage, now) + // [SPARK-24415] Wait for all tasks to finish before removing stage from live list + val removeStage = +stage.activeTasks == 0 && + (v1.StageStatus.COMPLETE.equals(stage.status) || +v1.StageStatus.FAILED.equals(stage.status)) + if (removeStage) { +update(stage, now, last = true) + } else { +maybeUpdate(stage, now) + } // Store both stage ID and task index in a single long variable for tracking at job level. val taskIndex = (event.stageId.toLong << Integer.SIZE) | event.taskInfo.index @@ -481,7 +499,7 @@ private[spark] class AppStatusListener( if (killedDelta > 0) { job.killedSummary = killedTasksSummary(event.reason, job.killedSummary) } -maybeUpdate(job, now) +conditionalLiveUpdate(job, now, removeStage) } val esummary = stage.executorSummary(event.taskInfo.executorId) @@ -492,7 +510,7 @@ private[spark] class AppStatusListener( if (metricsDelta != null) { esummary.metrics = LiveEntityHelpers.addMetrics(esummary.metrics, metricsDelta) } - maybeUpdate(esummary, now) + conditionalLiveUpdate(esummary, now, removeStage) if (!stage.cleaning && stage.savedTasks.get() > maxTasksPerStage) { stage.cleaning = true @@ -500,6 +518,9 @@ private[spark] class AppStatusListener( cleanupTasks(stage) } } + if (removeStage) { +liveStages.remove((event.stageId, event.stageAttemptId)) + } } liveExecutors.get(event.taskInfo.executorId).foreach { exec => @@ -524,17 +545,13 @@ private[spark]
spark git commit: [SPARK-25231] Fix synchronization of executor heartbeat receiver in TaskSchedulerImpl
Repository: spark Updated Branches: refs/heads/master 925449283 -> 559b899ac [SPARK-25231] Fix synchronization of executor heartbeat receiver in TaskSchedulerImpl Running a large Spark job with speculation turned on was causing executor heartbeats to time out on the driver end after sometime and eventually, after hitting the max number of executor failures, the job would fail. ## What changes were proposed in this pull request? The main reason for the heartbeat timeouts was that the heartbeat-receiver-event-loop-thread was blocked waiting on the TaskSchedulerImpl object which was being held by one of the dispatcher-event-loop threads executing the method dequeueSpeculativeTasks() in TaskSetManager.scala. On further analysis of the heartbeat receiver method executorHeartbeatReceived() in TaskSchedulerImpl class, we found out that instead of waiting to acquire the lock on the TaskSchedulerImpl object, we can remove that lock and make the operations to the global variables inside the code block to be atomic. The block of code in that method only uses one global HashMap taskIdToTaskSetManager. Making that map a ConcurrentHashMap, we are ensuring atomicity of operations and speeding up the heartbeat receiver thread operation. ## How was this patch tested? Screenshots of the thread dump have been attached below: **heartbeat-receiver-event-loop-thread:** https://user-images.githubusercontent.com/8190/44593413-e25df780-a788-11e8-9520-176a18401a59.png;> **dispatcher-event-loop-thread:** https://user-images.githubusercontent.com/8190/44593484-13d6c300-a789-11e8-8d88-34b1d51d4541.png;> Closes #1 from pgandhi999/SPARK-25231. Authored-by: pgandhi Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/559b899a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/559b899a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/559b899a Branch: refs/heads/master Commit: 559b899aceb160fcec3a57109c0b60a0ae40daeb Parents: 9254492 Author: pgandhi Authored: Wed Sep 5 16:10:49 2018 -0500 Committer: Thomas Graves Committed: Wed Sep 5 16:10:49 2018 -0500 -- .../org/apache/spark/scheduler/TaskSchedulerImpl.scala | 12 ++-- .../cluster/CoarseGrainedSchedulerBackend.scala | 2 +- .../spark/scheduler/SchedulerIntegrationSuite.scala | 3 ++- .../apache/spark/scheduler/TaskSchedulerImplSuite.scala | 6 +++--- 4 files changed, 12 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/559b899a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 8992d7e..8b71170 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -19,7 +19,7 @@ package org.apache.spark.scheduler import java.nio.ByteBuffer import java.util.{Locale, Timer, TimerTask} -import java.util.concurrent.TimeUnit +import java.util.concurrent.{ConcurrentHashMap, TimeUnit} import java.util.concurrent.atomic.AtomicLong import scala.collection.Set @@ -91,7 +91,7 @@ private[spark] class TaskSchedulerImpl( private val taskSetsByStageIdAndAttempt = new HashMap[Int, HashMap[Int, TaskSetManager]] // Protected by `this` - private[scheduler] val taskIdToTaskSetManager = new HashMap[Long, TaskSetManager] + private[scheduler] val taskIdToTaskSetManager = new ConcurrentHashMap[Long, TaskSetManager] val taskIdToExecutorId = new HashMap[Long, String] @volatile private var hasReceivedTask = false @@ -315,7 +315,7 @@ private[spark] class TaskSchedulerImpl( for (task <- taskSet.resourceOffer(execId, host, maxLocality)) { tasks(i) += task val tid = task.taskId -taskIdToTaskSetManager(tid) = taskSet +taskIdToTaskSetManager.put(tid, taskSet) taskIdToExecutorId(tid) = execId executorIdToRunningTaskIds(execId).add(tid) availableCpus(i) -= CPUS_PER_TASK @@ -465,7 +465,7 @@ private[spark] class TaskSchedulerImpl( var reason: Option[ExecutorLossReason] = None synchronized { try { -taskIdToTaskSetManager.get(tid) match { +Option(taskIdToTaskSetManager.get(tid)) match { case Some(taskSet) => if (state == TaskState.LOST) { // TaskState.LOST is only used by the deprecated Mesos fine-grained scheduling mode, @@ -517,10 +517,10 @@ private[spark] class TaskSchedulerImpl( accumUpdates: Array[(Long, Seq[AccumulatorV2[_,
spark git commit: [SPARK-25231] Fix synchronization of executor heartbeat receiver in TaskSchedulerImpl
Repository: spark Updated Branches: refs/heads/branch-2.3 dbf0b9340 -> 31e46ec60 [SPARK-25231] Fix synchronization of executor heartbeat receiver in TaskSchedulerImpl Running a large Spark job with speculation turned on was causing executor heartbeats to time out on the driver end after sometime and eventually, after hitting the max number of executor failures, the job would fail. ## What changes were proposed in this pull request? The main reason for the heartbeat timeouts was that the heartbeat-receiver-event-loop-thread was blocked waiting on the TaskSchedulerImpl object which was being held by one of the dispatcher-event-loop threads executing the method dequeueSpeculativeTasks() in TaskSetManager.scala. On further analysis of the heartbeat receiver method executorHeartbeatReceived() in TaskSchedulerImpl class, we found out that instead of waiting to acquire the lock on the TaskSchedulerImpl object, we can remove that lock and make the operations to the global variables inside the code block to be atomic. The block of code in that method only uses one global HashMap taskIdToTaskSetManager. Making that map a ConcurrentHashMap, we are ensuring atomicity of operations and speeding up the heartbeat receiver thread operation. ## How was this patch tested? Screenshots of the thread dump have been attached below: **heartbeat-receiver-event-loop-thread:** https://user-images.githubusercontent.com/8190/44593413-e25df780-a788-11e8-9520-176a18401a59.png;> **dispatcher-event-loop-thread:** https://user-images.githubusercontent.com/8190/44593484-13d6c300-a789-11e8-8d88-34b1d51d4541.png;> Closes #1 from pgandhi999/SPARK-25231. Authored-by: pgandhi Signed-off-by: Thomas Graves (cherry picked from commit 559b899aceb160fcec3a57109c0b60a0ae40daeb) Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/31e46ec6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/31e46ec6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/31e46ec6 Branch: refs/heads/branch-2.3 Commit: 31e46ec60849d315a4e83e0a332606a4405907ad Parents: dbf0b93 Author: pgandhi Authored: Wed Sep 5 16:10:49 2018 -0500 Committer: Thomas Graves Committed: Wed Sep 5 16:11:08 2018 -0500 -- .../org/apache/spark/scheduler/TaskSchedulerImpl.scala | 12 ++-- .../cluster/CoarseGrainedSchedulerBackend.scala | 2 +- .../spark/scheduler/SchedulerIntegrationSuite.scala | 3 ++- .../apache/spark/scheduler/TaskSchedulerImplSuite.scala | 6 +++--- 4 files changed, 12 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/31e46ec6/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 56c0bf6..4edc6b2 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -19,7 +19,7 @@ package org.apache.spark.scheduler import java.nio.ByteBuffer import java.util.{Locale, Timer, TimerTask} -import java.util.concurrent.TimeUnit +import java.util.concurrent.{ConcurrentHashMap, TimeUnit} import java.util.concurrent.atomic.AtomicLong import scala.collection.Set @@ -90,7 +90,7 @@ private[spark] class TaskSchedulerImpl( private val taskSetsByStageIdAndAttempt = new HashMap[Int, HashMap[Int, TaskSetManager]] // Protected by `this` - private[scheduler] val taskIdToTaskSetManager = new HashMap[Long, TaskSetManager] + private[scheduler] val taskIdToTaskSetManager = new ConcurrentHashMap[Long, TaskSetManager] val taskIdToExecutorId = new HashMap[Long, String] @volatile private var hasReceivedTask = false @@ -286,7 +286,7 @@ private[spark] class TaskSchedulerImpl( for (task <- taskSet.resourceOffer(execId, host, maxLocality)) { tasks(i) += task val tid = task.taskId -taskIdToTaskSetManager(tid) = taskSet +taskIdToTaskSetManager.put(tid, taskSet) taskIdToExecutorId(tid) = execId executorIdToRunningTaskIds(execId).add(tid) availableCpus(i) -= CPUS_PER_TASK @@ -392,7 +392,7 @@ private[spark] class TaskSchedulerImpl( var reason: Option[ExecutorLossReason] = None synchronized { try { -taskIdToTaskSetManager.get(tid) match { +Option(taskIdToTaskSetManager.get(tid)) match { case Some(taskSet) => if (state == TaskState.LOST) { // TaskState.LOST is only used by the deprecated Mesos fine-grained scheduling mode, @@ -444,10
spark git commit: [SPARK-24909][CORE] Always unregister pending partition on task completion.
Repository: spark Updated Branches: refs/heads/branch-2.3 b072717b3 -> dbf0b9340 [SPARK-24909][CORE] Always unregister pending partition on task completion. Spark scheduler can hang when fetch failures, executor lost, task running on lost executor, and multiple stage attempts. To fix this we change to always unregister the pending partition on task completion. this PR is actually reverting the change in SPARK-19263, so that it always does shuffleStage.pendingPartitions -= task.partitionId. The change in SPARK-23433, should fix the issue originally from SPARK-19263. Unit tests. The condition happens on a race which I haven't reproduced on a real customer, just see it sometimes on customers jobs in a real cluster. I am also working on adding spark scheduler integration tests. Closes #21976 from tgravescs/SPARK-24909. Authored-by: Thomas Graves Signed-off-by: Marcelo Vanzin (cherry picked from commit ec3e9986385880adce1648eae30007eccff862ba) Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dbf0b934 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dbf0b934 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dbf0b934 Branch: refs/heads/branch-2.3 Commit: dbf0b934024dfa562775967e3e3114de5a163443 Parents: b072717 Author: Thomas Graves Authored: Wed Aug 29 16:32:02 2018 -0700 Committer: Thomas Graves Committed: Thu Aug 30 09:10:00 2018 -0500 -- .../apache/spark/scheduler/DAGScheduler.scala | 17 +- .../spark/scheduler/DAGSchedulerSuite.scala | 24 2 files changed, 16 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dbf0b934/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 7029e22..0df38f1 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -1252,18 +1252,10 @@ class DAGScheduler( case smt: ShuffleMapTask => val shuffleStage = stage.asInstanceOf[ShuffleMapStage] +shuffleStage.pendingPartitions -= task.partitionId val status = event.result.asInstanceOf[MapStatus] val execId = status.location.executorId logDebug("ShuffleMapTask finished on " + execId) -if (stageIdToStage(task.stageId).latestInfo.attemptNumber == task.stageAttemptId) { - // This task was for the currently running attempt of the stage. Since the task - // completed successfully from the perspective of the TaskSetManager, mark it as - // no longer pending (the TaskSetManager may consider the task complete even - // when the output needs to be ignored because the task's epoch is too small below. - // In this case, when pending partitions is empty, there will still be missing - // output locations, which will cause the DAGScheduler to resubmit the stage below.) - shuffleStage.pendingPartitions -= task.partitionId -} if (failedEpoch.contains(execId) && smt.epoch <= failedEpoch(execId)) { logInfo(s"Ignoring possibly bogus $smt completion from executor $execId") } else { @@ -1272,13 +1264,6 @@ class DAGScheduler( // available. mapOutputTracker.registerMapOutput( shuffleStage.shuffleDep.shuffleId, smt.partitionId, status) - // Remove the task's partition from pending partitions. This may have already been - // done above, but will not have been done yet in cases where the task attempt was - // from an earlier attempt of the stage (i.e., not the attempt that's currently - // running). This allows the DAGScheduler to mark the stage as complete when one - // copy of each task has finished successfully, even if the currently active stage - // still has tasks running. - shuffleStage.pendingPartitions -= task.partitionId } if (runningStages.contains(shuffleStage) && shuffleStage.pendingPartitions.isEmpty) { http://git-wip-us.apache.org/repos/asf/spark/blob/dbf0b934/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index
spark git commit: [SPARK-25043] print master and appId from spark-sql on startup
Repository: spark Updated Branches: refs/heads/master 102487584 -> b81e3031f [SPARK-25043] print master and appId from spark-sql on startup ## What changes were proposed in this pull request? A small change to print the master and appId from spark-sql as with logging turned down all the way (`log4j.logger.org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver=WARN`), we may not know this information easily. This adds the following string before the `spark-sql>` prompt shows on the screen. `Spark master: yarn, Application Id: application_123456789_12345` ## How was this patch tested? I ran spark-sql locally and saw the appId displayed as expected. Please review http://spark.apache.org/contributing.html before opening a pull request. Closes #22025 from abellina/SPARK-25043_print_master_and_app_id_from_sparksql. Lead-authored-by: Alessandro Bellina Co-authored-by: Alessandro Bellina Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b81e3031 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b81e3031 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b81e3031 Branch: refs/heads/master Commit: b81e3031fd247dfb4b3e02e0a986fb4b19d00f7c Parents: 1024875 Author: Alessandro Bellina Authored: Tue Aug 14 13:15:55 2018 -0500 Committer: Thomas Graves Committed: Tue Aug 14 13:15:55 2018 -0500 -- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 8 1 file changed, 8 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b81e3031/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index d9fd3eb..bb96cea 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -258,6 +258,8 @@ private[hive] object SparkSQLCLIDriver extends Logging { def continuedPromptWithDBSpaces: String = continuedPrompt + ReflectionUtils.invokeStatic( classOf[CliDriver], "spacesForString", classOf[String] -> currentDB) +cli.printMasterAndAppId + var currentPrompt = promptWithCurrentDB var line = reader.readLine(currentPrompt + "> ") @@ -323,6 +325,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { hiveVariables.asScala.foreach(kv => SparkSQLEnv.sqlContext.conf.setConfString(kv._1, kv._2)) } + def printMasterAndAppId(): Unit = { +val master = SparkSQLEnv.sparkContext.master +val appId = SparkSQLEnv.sparkContext.applicationId +console.printInfo(s"Spark master: $master, Application Id: $appId") + } + override def processCmd(cmd: String): Int = { val cmd_trimmed: String = cmd.trim() val cmd_lower = cmd_trimmed.toLowerCase(Locale.ROOT) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24992][CORE] spark should randomize yarn local dir selection
Repository: spark Updated Branches: refs/heads/master 1a5e46076 -> 51e2b38d9 [SPARK-24992][CORE] spark should randomize yarn local dir selection **Description: [SPARK-24992](https://issues.apache.org/jira/browse/SPARK-24992)** Utils.getLocalDir is used to get path of a temporary directory. However, it always returns the the same directory, which is the first element in the array localRootDirs. When running on YARN, this might causes the case that we always write to one disk, which makes it busy while other disks are free. We should randomize the selection to spread out the loads. **What changes were proposed in this pull request?** This PR randomized the selection of local directory inside the method Utils.getLocalDir. This change affects the Utils.fetchFile method since it based on the fact that Utils.getLocalDir always return the same directory to cache file. Therefore, a new variable cachedLocalDir is used to cache the first localDirectory that it gets from Utils.getLocalDir. Also, when getting the configured local directories (inside Utils. getConfiguredLocalDirs), in case we are in yarn mode, the array of directories are also randomized before return. Author: Hieu Huynh <âhieu.hu...@oath.comâ> Closes #21953 from hthuynh2/SPARK_24992. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51e2b38d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51e2b38d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51e2b38d Branch: refs/heads/master Commit: 51e2b38d93df8cb0cc151d5e68a2190eab52644c Parents: 1a5e460 Author: Hieu Huynh <âhieu.hu...@oath.comâ> Authored: Mon Aug 6 13:58:28 2018 -0500 Committer: Thomas Graves Committed: Mon Aug 6 13:58:28 2018 -0500 -- .../scala/org/apache/spark/util/Utils.scala | 21 1 file changed, 17 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/51e2b38d/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index a6fd363..7ec707d 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -83,6 +83,7 @@ private[spark] object Utils extends Logging { val random = new Random() private val sparkUncaughtExceptionHandler = new SparkUncaughtExceptionHandler + @volatile private var cachedLocalDir: String = "" /** * Define a default value for driver memory here since this value is referenced across the code @@ -462,7 +463,15 @@ private[spark] object Utils extends Logging { if (useCache && fetchCacheEnabled) { val cachedFileName = s"${url.hashCode}${timestamp}_cache" val lockFileName = s"${url.hashCode}${timestamp}_lock" - val localDir = new File(getLocalDir(conf)) + // Set the cachedLocalDir for the first time and re-use it later + if (cachedLocalDir.isEmpty) { +this.synchronized { + if (cachedLocalDir.isEmpty) { +cachedLocalDir = getLocalDir(conf) + } +} + } + val localDir = new File(cachedLocalDir) val lockFile = new File(localDir, lockFileName) val lockFileChannel = new RandomAccessFile(lockFile, "rw").getChannel() // Only one executor entry. @@ -767,13 +776,17 @@ private[spark] object Utils extends Logging { * - Otherwise, this will return java.io.tmpdir. * * Some of these configuration options might be lists of multiple paths, but this method will - * always return a single directory. + * always return a single directory. The return directory is chosen randomly from the array + * of directories it gets from getOrCreateLocalRootDirs. */ def getLocalDir(conf: SparkConf): String = { -getOrCreateLocalRootDirs(conf).headOption.getOrElse { +val localRootDirs = getOrCreateLocalRootDirs(conf) +if (localRootDirs.isEmpty) { val configuredLocalDirs = getConfiguredLocalDirs(conf) throw new IOException( s"Failed to get a temp directory under [${configuredLocalDirs.mkString(",")}].") +} else { + localRootDirs(scala.util.Random.nextInt(localRootDirs.length)) } } @@ -815,7 +828,7 @@ private[spark] object Utils extends Logging { // to what Yarn on this system said was available. Note this assumes that Yarn has // created the directories already, and that they are secured so that only the // user has access to them. - getYarnLocalDirs(conf).split(",") + randomizeInPlace(getYarnLocalDirs(conf).split(",")) } else if (conf.getenv("SPARK_EXECUTOR_DIRS") != null) {
spark git commit: [SPARK-24981][CORE] ShutdownHook timeout causes job to fail when succeeded when SparkContext stop() not called by user program
Repository: spark Updated Branches: refs/heads/master c1760da5d -> 35700bb7f [SPARK-24981][CORE] ShutdownHook timeout causes job to fail when succeeded when SparkContext stop() not called by user program **Description** The issue is described in [SPARK-24981](https://issues.apache.org/jira/browse/SPARK-24981). **How does this PR fix the issue?** This PR catch the Exception that is thrown while the Sparkcontext.stop() is running (when it is called by the ShutdownHookManager). **How was this patch tested?** I manually tested it by adding delay (60s) inside the stop(). This make the shutdownHookManger interrupt the thread that is running stop(). The Interrupted Exception was catched and the job succeed. Author: Hieu Huynh <âhieu.hu...@oath.comâ> Author: Hieu Tri Huynh Closes #21936 from hthuynh2/SPARK_24981. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/35700bb7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/35700bb7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/35700bb7 Branch: refs/heads/master Commit: 35700bb7f2e3008ff781a1b3a1da8147d26371be Parents: c1760da Author: Hieu Huynh <âhieu.hu...@oath.comâ> Authored: Mon Aug 6 09:01:51 2018 -0500 Committer: Thomas Graves Committed: Mon Aug 6 09:01:51 2018 -0500 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/35700bb7/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 03e91cd..e8bacee 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -571,7 +571,12 @@ class SparkContext(config: SparkConf) extends Logging { _shutdownHookRef = ShutdownHookManager.addShutdownHook( ShutdownHookManager.SPARK_CONTEXT_SHUTDOWN_PRIORITY) { () => logInfo("Invoking stop() from shutdown hook") - stop() + try { +stop() + } catch { +case e: Throwable => + logWarning("Ignoring Exception while stopping SparkContext from shutdown hook", e) + } } } catch { case NonFatal(e) => - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-13343] speculative tasks that didn't commit shouldn't be marked as success
Repository: spark Updated Branches: refs/heads/master ee5a5a092 -> 5828f41a5 [SPARK-13343] speculative tasks that didn't commit shouldn't be marked as success **Description** Currently Speculative tasks that didn't commit can show up as success (depending on timing of commit). This is a bit confusing because that task didn't really succeed in the sense it didn't write anything. I think these tasks should be marked as KILLED or something that is more obvious to the user exactly what happened. it is happened to hit the timing where it got a commit denied exception then it shows up as failed and counts against your task failures. It shouldn't count against task failures since that failure really doesn't matter. MapReduce handles these situation so perhaps we can look there for a model. https://user-images.githubusercontent.com/15680678/42013170-99db48c2-7a61-11e8-8c7b-ef94c84e36ea.png;> **How can this issue happen?** When both attempts of a task finish before the driver sends command to kill one of them, both of them send the status update FINISHED to the driver. The driver calls TaskSchedulerImpl to handle one successful task at a time. When it handles the first successful task, it sends the command to kill the other copy of the task, however, because that task is already finished, the executor will ignore the command. After finishing handling the first attempt, it processes the second one, although all actions on the result of this task are skipped, this copy of the task is still marked as SUCCESS. As a result, even though this issue does not affect the result of the job, it might cause confusing to user because both of them appear to be successful. **How does this PR fix the issue?** The simple way to fix this issue is that when taskSetManager handles successful task, it checks if any other attempt succeeded. If this is the case, it will call handleFailedTask with state==KILLED and reason==TaskKilled(âanother attempt succeededâ) to handle this task as begin killed. **How was this patch tested?** I tested this manually by running applications, that caused the issue before, a few times, and observed that the issue does not happen again. Also, I added a unit test in TaskSetManagerSuite to test that if we call handleSuccessfulTask to handle status update for 2 copies of a task, only the one that is handled first will be mark as SUCCESS Author: Hieu Huynh <âhieu.hu...@oath.comâ> Author: hthuynh2 Closes #21653 from hthuynh2/SPARK_13343. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5828f41a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5828f41a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5828f41a Branch: refs/heads/master Commit: 5828f41a52c446b774a909e96eff8d8c5831b394 Parents: ee5a5a0 Author: Hieu Huynh <âhieu.hu...@oath.comâ> Authored: Fri Jul 27 12:34:14 2018 -0500 Committer: Thomas Graves Committed: Fri Jul 27 12:34:14 2018 -0500 -- .../apache/spark/scheduler/TaskSetManager.scala | 19 +- .../spark/scheduler/TaskSetManagerSuite.scala | 70 2 files changed, 88 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5828f41a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index 0b21256..8b77641 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -29,7 +29,7 @@ import org.apache.spark._ import org.apache.spark.TaskState.TaskState import org.apache.spark.internal.{config, Logging} import org.apache.spark.scheduler.SchedulingMode._ -import org.apache.spark.util.{AccumulatorV2, Clock, SystemClock, Utils} +import org.apache.spark.util.{AccumulatorV2, Clock, LongAccumulator, SystemClock, Utils} import org.apache.spark.util.collection.MedianHeap /** @@ -728,6 +728,23 @@ private[spark] class TaskSetManager( def handleSuccessfulTask(tid: Long, result: DirectTaskResult[_]): Unit = { val info = taskInfos(tid) val index = info.index +// Check if any other attempt succeeded before this and this attempt has not been handled +if (successful(index) && killedByOtherAttempt.contains(tid)) { + // Undo the effect on calculatedTasks and totalResultSize made earlier when + // checking if can fetch more results + calculatedTasks -= 1 + val resultSizeAcc = result.accumUpdates.find(a => +a.name == Some(InternalAccumulator.RESULT_SIZE)) + if (resultSizeAcc.isDefined) { +totalResultSize
spark git commit: [SPARK-22151] PYTHONPATH not picked up from the spark.yarn.appMaste…
Repository: spark Updated Branches: refs/heads/master c8bee932c -> 1272b2034 [SPARK-22151] PYTHONPATH not picked up from the spark.yarn.appMaste⦠â¦rEnv properly Running in yarn cluster mode and trying to set pythonpath via spark.yarn.appMasterEnv.PYTHONPATH doesn't work. the yarn Client code looks at the env variables: val pythonPathStr = (sys.env.get("PYTHONPATH") ++ pythonPath) But when you set spark.yarn.appMasterEnv it puts it into the local env. So the python path set in spark.yarn.appMasterEnv isn't properly set. You can work around if you are running in cluster mode by setting it on the client like: PYTHONPATH=./addon/python/ spark-submit ## What changes were proposed in this pull request? In Client.scala, PYTHONPATH was being overridden, so changed code to append values to PYTHONPATH instead of overriding them. ## How was this patch tested? Added log statements to ApplicationMaster.scala to check for environment variable PYTHONPATH, ran a spark job in cluster mode before the change and verified the issue. Performed the same test after the change and verified the fix. Author: pgandhi Closes #21468 from pgandhi999/SPARK-22151. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1272b203 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1272b203 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1272b203 Branch: refs/heads/master Commit: 1272b2034d4eed4bfe60a49e1065871b3a3f96e0 Parents: c8bee93 Author: pgandhi Authored: Wed Jul 18 14:07:03 2018 -0500 Committer: Thomas Graves Committed: Wed Jul 18 14:07:03 2018 -0500 -- .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1272b203/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 793d012..ed9879c 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -811,10 +811,12 @@ private[spark] class Client( // Finally, update the Spark config to propagate PYTHONPATH to the AM and executors. if (pythonPath.nonEmpty) { - val pythonPathStr = (sys.env.get("PYTHONPATH") ++ pythonPath) + val pythonPathList = (sys.env.get("PYTHONPATH") ++ pythonPath) + env("PYTHONPATH") = (env.get("PYTHONPATH") ++ pythonPathList) .mkString(ApplicationConstants.CLASS_PATH_SEPARATOR) - env("PYTHONPATH") = pythonPathStr - sparkConf.setExecutorEnv("PYTHONPATH", pythonPathStr) + val pythonPathExecutorEnv = (sparkConf.getExecutorEnv.toMap.get("PYTHONPATH") ++ +pythonPathList).mkString(ApplicationConstants.CLASS_PATH_SEPARATOR) + sparkConf.setExecutorEnv("PYTHONPATH", pythonPathExecutorEnv) } if (isClusterMode) { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24677][CORE] Avoid NoSuchElementException from MedianHeap
Repository: spark Updated Branches: refs/heads/branch-2.2 17db57213 -> 144426cff [SPARK-24677][CORE] Avoid NoSuchElementException from MedianHeap ## What changes were proposed in this pull request? When speculation is enabled, TaskSetManager#markPartitionCompleted should write successful task duration to MedianHeap, not just increase tasksSuccessful. Otherwise when TaskSetManager#checkSpeculatableTasks,tasksSuccessful non-zero, but MedianHeap is empty. Then throw an exception successfulTaskDurations.median java.util.NoSuchElementException: MedianHeap is empty. Finally led to stopping SparkContext. ## How was this patch tested? TaskSetManagerSuite.scala unit test:[SPARK-24677] MedianHeap should not be empty when speculation is enabled Author: sychen Closes #21656 from cxzl25/fix_MedianHeap_empty. (cherry picked from commit c8bee932cb644627c4049b5a07dd8028968572d9) Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/144426cf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/144426cf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/144426cf Branch: refs/heads/branch-2.2 Commit: 144426cffd6e4b26b676004f5489e218140f7df2 Parents: 17db572 Author: sychen Authored: Wed Jul 18 13:24:41 2018 -0500 Committer: Thomas Graves Committed: Wed Jul 18 13:26:24 2018 -0500 -- .../spark/scheduler/TaskSchedulerImpl.scala | 7 ++- .../apache/spark/scheduler/TaskSetManager.scala | 7 ++- .../spark/scheduler/TaskSetManagerSuite.scala | 49 3 files changed, 59 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/144426cf/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index df6407b..f8c62b4 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -701,9 +701,12 @@ private[spark] class TaskSchedulerImpl private[scheduler]( * do not also submit those same tasks. That also means that a task completion from an earlier * attempt can lead to the entire stage getting marked as successful. */ - private[scheduler] def markPartitionCompletedInAllTaskSets(stageId: Int, partitionId: Int) = { + private[scheduler] def markPartitionCompletedInAllTaskSets( + stageId: Int, + partitionId: Int, + taskInfo: TaskInfo) = { taskSetsByStageIdAndAttempt.getOrElse(stageId, Map()).values.foreach { tsm => - tsm.markPartitionCompleted(partitionId) + tsm.markPartitionCompleted(partitionId, taskInfo) } } http://git-wip-us.apache.org/repos/asf/spark/blob/144426cf/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index d9515fb..705b896 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -748,7 +748,7 @@ private[spark] class TaskSetManager( } // There may be multiple tasksets for this stage -- we let all of them know that the partition // was completed. This may result in some of the tasksets getting completed. -sched.markPartitionCompletedInAllTaskSets(stageId, tasks(index).partitionId) +sched.markPartitionCompletedInAllTaskSets(stageId, tasks(index).partitionId, info) // This method is called by "TaskSchedulerImpl.handleSuccessfulTask" which holds the // "TaskSchedulerImpl" lock until exiting. To avoid the SPARK-7655 issue, we should not // "deserialize" the value when holding a lock to avoid blocking other threads. So we call @@ -759,9 +759,12 @@ private[spark] class TaskSetManager( maybeFinishTaskSet() } - private[scheduler] def markPartitionCompleted(partitionId: Int): Unit = { + private[scheduler] def markPartitionCompleted(partitionId: Int, taskInfo: TaskInfo): Unit = { partitionToIndex.get(partitionId).foreach { index => if (!successful(index)) { +if (speculationEnabled && !isZombie) { + successfulTaskDurations.insert(taskInfo.duration) +} tasksSuccessful += 1 successful(index) = true if (tasksSuccessful == numTasks) { http://git-wip-us.apache.org/repos/asf/spark/blob/144426cf/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
spark git commit: [SPARK-24677][CORE] Avoid NoSuchElementException from MedianHeap
Repository: spark Updated Branches: refs/heads/branch-2.3 e31b4766b -> 7be70e29d [SPARK-24677][CORE] Avoid NoSuchElementException from MedianHeap ## What changes were proposed in this pull request? When speculation is enabled, TaskSetManager#markPartitionCompleted should write successful task duration to MedianHeap, not just increase tasksSuccessful. Otherwise when TaskSetManager#checkSpeculatableTasks,tasksSuccessful non-zero, but MedianHeap is empty. Then throw an exception successfulTaskDurations.median java.util.NoSuchElementException: MedianHeap is empty. Finally led to stopping SparkContext. ## How was this patch tested? TaskSetManagerSuite.scala unit test:[SPARK-24677] MedianHeap should not be empty when speculation is enabled Author: sychen Closes #21656 from cxzl25/fix_MedianHeap_empty. (cherry picked from commit c8bee932cb644627c4049b5a07dd8028968572d9) Signed-off-by: Thomas Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7be70e29 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7be70e29 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7be70e29 Branch: refs/heads/branch-2.3 Commit: 7be70e29dd92de36dbb30ce39623d588f48e4cac Parents: e31b476 Author: sychen Authored: Wed Jul 18 13:24:41 2018 -0500 Committer: Thomas Graves Committed: Wed Jul 18 13:24:54 2018 -0500 -- .../spark/scheduler/TaskSchedulerImpl.scala | 7 ++- .../apache/spark/scheduler/TaskSetManager.scala | 7 ++- .../spark/scheduler/TaskSetManagerSuite.scala | 49 3 files changed, 59 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7be70e29/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 598b62f..56c0bf6 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -697,9 +697,12 @@ private[spark] class TaskSchedulerImpl( * do not also submit those same tasks. That also means that a task completion from an earlier * attempt can lead to the entire stage getting marked as successful. */ - private[scheduler] def markPartitionCompletedInAllTaskSets(stageId: Int, partitionId: Int) = { + private[scheduler] def markPartitionCompletedInAllTaskSets( + stageId: Int, + partitionId: Int, + taskInfo: TaskInfo) = { taskSetsByStageIdAndAttempt.getOrElse(stageId, Map()).values.foreach { tsm => - tsm.markPartitionCompleted(partitionId) + tsm.markPartitionCompleted(partitionId, taskInfo) } } http://git-wip-us.apache.org/repos/asf/spark/blob/7be70e29/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index b52e376..c90ae50 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -759,7 +759,7 @@ private[spark] class TaskSetManager( } // There may be multiple tasksets for this stage -- we let all of them know that the partition // was completed. This may result in some of the tasksets getting completed. -sched.markPartitionCompletedInAllTaskSets(stageId, tasks(index).partitionId) +sched.markPartitionCompletedInAllTaskSets(stageId, tasks(index).partitionId, info) // This method is called by "TaskSchedulerImpl.handleSuccessfulTask" which holds the // "TaskSchedulerImpl" lock until exiting. To avoid the SPARK-7655 issue, we should not // "deserialize" the value when holding a lock to avoid blocking other threads. So we call @@ -770,9 +770,12 @@ private[spark] class TaskSetManager( maybeFinishTaskSet() } - private[scheduler] def markPartitionCompleted(partitionId: Int): Unit = { + private[scheduler] def markPartitionCompleted(partitionId: Int, taskInfo: TaskInfo): Unit = { partitionToIndex.get(partitionId).foreach { index => if (!successful(index)) { +if (speculationEnabled && !isZombie) { + successfulTaskDurations.insert(taskInfo.duration) +} tasksSuccessful += 1 successful(index) = true if (tasksSuccessful == numTasks) { http://git-wip-us.apache.org/repos/asf/spark/blob/7be70e29/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
spark git commit: [SPARK-24677][CORE] Avoid NoSuchElementException from MedianHeap
Repository: spark Updated Branches: refs/heads/master fc0c8c971 -> c8bee932c [SPARK-24677][CORE] Avoid NoSuchElementException from MedianHeap ## What changes were proposed in this pull request? When speculation is enabled, TaskSetManager#markPartitionCompleted should write successful task duration to MedianHeap, not just increase tasksSuccessful. Otherwise when TaskSetManager#checkSpeculatableTasks,tasksSuccessful non-zero, but MedianHeap is empty. Then throw an exception successfulTaskDurations.median java.util.NoSuchElementException: MedianHeap is empty. Finally led to stopping SparkContext. ## How was this patch tested? TaskSetManagerSuite.scala unit test:[SPARK-24677] MedianHeap should not be empty when speculation is enabled Author: sychen Closes #21656 from cxzl25/fix_MedianHeap_empty. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8bee932 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8bee932 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8bee932 Branch: refs/heads/master Commit: c8bee932cb644627c4049b5a07dd8028968572d9 Parents: fc0c8c9 Author: sychen Authored: Wed Jul 18 13:24:41 2018 -0500 Committer: Thomas Graves Committed: Wed Jul 18 13:24:41 2018 -0500 -- .../spark/scheduler/TaskSchedulerImpl.scala | 7 ++- .../apache/spark/scheduler/TaskSetManager.scala | 7 ++- .../spark/scheduler/TaskSetManagerSuite.scala | 49 3 files changed, 59 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c8bee932/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 598b62f..56c0bf6 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -697,9 +697,12 @@ private[spark] class TaskSchedulerImpl( * do not also submit those same tasks. That also means that a task completion from an earlier * attempt can lead to the entire stage getting marked as successful. */ - private[scheduler] def markPartitionCompletedInAllTaskSets(stageId: Int, partitionId: Int) = { + private[scheduler] def markPartitionCompletedInAllTaskSets( + stageId: Int, + partitionId: Int, + taskInfo: TaskInfo) = { taskSetsByStageIdAndAttempt.getOrElse(stageId, Map()).values.foreach { tsm => - tsm.markPartitionCompleted(partitionId) + tsm.markPartitionCompleted(partitionId, taskInfo) } } http://git-wip-us.apache.org/repos/asf/spark/blob/c8bee932/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index a18c665..6071605 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -758,7 +758,7 @@ private[spark] class TaskSetManager( } // There may be multiple tasksets for this stage -- we let all of them know that the partition // was completed. This may result in some of the tasksets getting completed. -sched.markPartitionCompletedInAllTaskSets(stageId, tasks(index).partitionId) +sched.markPartitionCompletedInAllTaskSets(stageId, tasks(index).partitionId, info) // This method is called by "TaskSchedulerImpl.handleSuccessfulTask" which holds the // "TaskSchedulerImpl" lock until exiting. To avoid the SPARK-7655 issue, we should not // "deserialize" the value when holding a lock to avoid blocking other threads. So we call @@ -769,9 +769,12 @@ private[spark] class TaskSetManager( maybeFinishTaskSet() } - private[scheduler] def markPartitionCompleted(partitionId: Int): Unit = { + private[scheduler] def markPartitionCompleted(partitionId: Int, taskInfo: TaskInfo): Unit = { partitionToIndex.get(partitionId).foreach { index => if (!successful(index)) { +if (speculationEnabled && !isZombie) { + successfulTaskDurations.insert(taskInfo.duration) +} tasksSuccessful += 1 successful(index) = true if (tasksSuccessful == numTasks) { http://git-wip-us.apache.org/repos/asf/spark/blob/c8bee932/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala -- diff --git
spark git commit: Update for spark 2.2.2 release
Repository: spark Updated Branches: refs/heads/branch-2.2 4bc4ccd63 -> 17db57213 Update for spark 2.2.2 release Release process for Spark 2.2.2 say to update the test. Author: Thomas Graves Closes #21707 from tgravescs/spark222-release. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/17db5721 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/17db5721 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/17db5721 Branch: refs/heads/branch-2.2 Commit: 17db57213aabc13cb59d13f0f570c7539dae Parents: 4bc4ccd Author: Thomas Graves Authored: Mon Jul 16 09:29:20 2018 -0500 Committer: Thomas Graves Committed: Mon Jul 16 09:29:20 2018 -0500 -- .../apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/17db5721/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 313059b..e6a6cac 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -170,7 +170,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { object PROCESS_TABLES extends QueryTest with SQLTestUtils { // Tests the latest version of every release line. - val testingVersions = Seq("2.0.2", "2.1.2", "2.2.1") + val testingVersions = Seq("2.0.2", "2.1.3", "2.2.2") protected var spark: SparkSession = _ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24610] fix reading small files via wholeTextFiles
Repository: spark Updated Branches: refs/heads/master 9fa4a1ed3 -> 1055c94cd [SPARK-24610] fix reading small files via wholeTextFiles ## What changes were proposed in this pull request? The `WholeTextFileInputFormat` determines the `maxSplitSize` for the file/s being read using the `wholeTextFiles` method. While this works well for large files, for smaller files where the maxSplitSize is smaller than the defaults being used with configs like hive-site.xml or explicitly passed in the form of `mapreduce.input.fileinputformat.split.minsize.per.node` or `mapreduce.input.fileinputformat.split.minsize.per.rack` , it just throws up an exception. ```java java.io.IOException: Minimum split size pernode 123456 cannot be larger than maximum split size 9962 at org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.getSplits(CombineFileInputFormat.java:200) at org.apache.spark.rdd.WholeTextFileRDD.getPartitions(WholeTextFileRDD.scala:50) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2096) at org.apache.spark.rdd.RDD.count(RDD.scala:1158) ... 48 elided ` This change checks the maxSplitSize against the minSplitSizePerNode and minSplitSizePerRack and set them if `maxSplitSize < minSplitSizePerNode/Rack` ## How was this patch tested? Test manually setting the conf while launching the job and added unit test. Author: Dhruve Ashar Closes #21601 from dhruve/bug/SPARK-24610. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1055c94c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1055c94c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1055c94c Branch: refs/heads/master Commit: 1055c94cdf072bfce5e36bb6552fe9b148bb9d17 Parents: 9fa4a1e Author: Dhruve Ashar Authored: Thu Jul 12 15:36:02 2018 -0500 Committer: Thomas Graves Committed: Thu Jul 12 15:36:02 2018 -0500 -- .../spark/input/WholeTextFileInputFormat.scala | 13 +++ .../input/WholeTextFileInputFormatSuite.scala | 96 2 files changed, 109 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1055c94c/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala -- diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala index f47cd38..04c5c4b 100644 --- a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala +++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala @@ -53,6 +53,19 @@ private[spark] class WholeTextFileInputFormat val totalLen = files.map(file => if (file.isDirectory) 0L else file.getLen).sum val maxSplitSize = Math.ceil(totalLen * 1.0 / (if (minPartitions == 0) 1 else minPartitions)).toLong + +// For small files we need to ensure the min split size per node & rack <= maxSplitSize +val config = context.getConfiguration +val minSplitSizePerNode = config.getLong(CombineFileInputFormat.SPLIT_MINSIZE_PERNODE, 0L) +val minSplitSizePerRack = config.getLong(CombineFileInputFormat.SPLIT_MINSIZE_PERRACK, 0L) + +if (maxSplitSize < minSplitSizePerNode) { + super.setMinSplitSizeNode(maxSplitSize) +} + +if (maxSplitSize < minSplitSizePerRack) { + super.setMinSplitSizeRack(maxSplitSize) +} super.setMaxSplitSize(maxSplitSize) } } http://git-wip-us.apache.org/repos/asf/spark/blob/1055c94c/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala new file mode 100644 index 000..817dc08 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You
svn commit: r28003 - /dev/spark/v2.2.2-rc2-docs/
Author: tgraves Date: Mon Jul 9 16:12:21 2018 New Revision: 28003 Log: Removing RC artifacts. Removed: dev/spark/v2.2.2-rc2-docs/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark-website git commit: Adding missing spark 2.2.2 announcement html files
Repository: spark-website Updated Branches: refs/heads/asf-site 179a5897a -> 2b5ba2f62 Adding missing spark 2.2.2 announcement html files Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/2b5ba2f6 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/2b5ba2f6 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/2b5ba2f6 Branch: refs/heads/asf-site Commit: 2b5ba2f6247a97374acc3c800d4a4179a0c0a334 Parents: 179a589 Author: Thomas Graves Authored: Mon Jul 9 11:04:33 2018 -0500 Committer: Thomas Graves Committed: Mon Jul 9 11:04:33 2018 -0500 -- site/news/spark-2-2-2-released.html| 232 +++ site/releases/spark-release-2-2-2.html | 238 site/sitemap.xml | 8 +- 3 files changed, 474 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/2b5ba2f6/site/news/spark-2-2-2-released.html -- diff --git a/site/news/spark-2-2-2-released.html b/site/news/spark-2-2-2-released.html new file mode 100644 index 000..1c46315 --- /dev/null +++ b/site/news/spark-2-2-2-released.html @@ -0,0 +1,232 @@ + + + + + + + + + Spark 2.2.2 released | Apache Spark + + + + + + + + + + + + + + + + + var _gaq = _gaq || []; + _gaq.push(['_setAccount', 'UA-32518208-2']); + _gaq.push(['_trackPageview']); + (function() { +var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; +ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; +var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); + })(); + + + function trackOutboundLink(link, category, action) { +try { + _gaq.push(['_trackEvent', category , action]); +} catch(err){} + +setTimeout(function() { + document.location.href = link.href; +}, 100); + } + + + + + + + + +https://code.jquery.com/jquery.js"> +https://netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"> + + + + + + + + + + + Lightning-fast unified analytics engine + + + + + + + + + + Toggle navigation + + + + + + + + + + Download + + + Libraries + + + SQL and DataFrames + Spark Streaming + MLlib (machine learning) + GraphX (graph) + + Third-Party Projects + + + + + Documentation + + + Latest Release (Spark 2.3.1) + Older Versions and Other Resources + Frequently Asked Questions + + + Examples + + + Community + + + Mailing Lists Resources + Contributing to Spark + Improvement Proposals (SPIP) + https://issues.apache.org/jira/browse/SPARK;>Issue Tracker + Powered By + Project Committers + Project History + + + + + Developers + + + Useful Developer Tools + Versioning Policy + Release Process + Security + + + + + +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> + Apache Software Foundation + + https://www.apache.org/;>Apache Homepage + https://www.apache.org/licenses/;>License + https://www.apache.org/foundation/sponsorship.html;>Sponsorship + https://www.apache.org/foundation/thanks.html;>Thanks + https://www.apache.org/security/;>Security + + + + + + + + + + + + Latest News + + + Spark 2.2.2 released + (Jul 02, 2018) + + Spark 2.1.3 released + (Jun 29, 2018) + + Spark 2.3.1 released + (Jun 08, 2018) + + Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted + (Mar 01, 2018) + + + Archive + + + https://www.apache.org/events/current-event.html;> +https://www.apache.org/events/current-event-234x60.png"/> + + + + +Download Spark + + +Built-in Libraries: + + +SQL and DataFrames +Spark Streaming +MLlib (machine learning) +
[2/3] spark-website git commit: Spark 2.2.2 release announcements
http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/news/spark-2-3-1-released.html -- diff --git a/site/news/spark-2-3-1-released.html b/site/news/spark-2-3-1-released.html index a6ab537..06b718a 100644 --- a/site/news/spark-2-3-1-released.html +++ b/site/news/spark-2-3-1-released.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/news/spark-2.0.0-preview.html -- diff --git a/site/news/spark-2.0.0-preview.html b/site/news/spark-2.0.0-preview.html index e704bab..da54cef 100644 --- a/site/news/spark-2.0.0-preview.html +++ b/site/news/spark-2.0.0-preview.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/news/spark-accepted-into-apache-incubator.html -- diff --git a/site/news/spark-accepted-into-apache-incubator.html b/site/news/spark-accepted-into-apache-incubator.html index 06ebb8d..696c6d0 100644 --- a/site/news/spark-accepted-into-apache-incubator.html +++ b/site/news/spark-accepted-into-apache-incubator.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/news/spark-and-shark-in-the-news.html -- diff --git a/site/news/spark-and-shark-in-the-news.html b/site/news/spark-and-shark-in-the-news.html index f3f5f41..328d3c4 100644 --- a/site/news/spark-and-shark-in-the-news.html +++ b/site/news/spark-and-shark-in-the-news.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/news/spark-becomes-tlp.html -- diff --git a/site/news/spark-becomes-tlp.html b/site/news/spark-becomes-tlp.html index 24051c2..981aaa7 100644 --- a/site/news/spark-becomes-tlp.html +++ b/site/news/spark-becomes-tlp.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/news/spark-featured-in-wired.html -- diff --git a/site/news/spark-featured-in-wired.html b/site/news/spark-featured-in-wired.html index 3bd546f..709c5dc 100644 --- a/site/news/spark-featured-in-wired.html +++ b/site/news/spark-featured-in-wired.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive
[1/3] spark-website git commit: Spark 2.2.2 release announcements
Repository: spark-website Updated Branches: refs/heads/asf-site 390743e8a -> 179a5897a http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/releases/spark-release-1-6-2.html -- diff --git a/site/releases/spark-release-1-6-2.html b/site/releases/spark-release-1-6-2.html index 5f459ce..d23f38d 100644 --- a/site/releases/spark-release-1-6-2.html +++ b/site/releases/spark-release-1-6-2.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/releases/spark-release-1-6-3.html -- diff --git a/site/releases/spark-release-1-6-3.html b/site/releases/spark-release-1-6-3.html index 6d9efd9..fb678c2 100644 --- a/site/releases/spark-release-1-6-3.html +++ b/site/releases/spark-release-1-6-3.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/releases/spark-release-2-0-0.html -- diff --git a/site/releases/spark-release-2-0-0.html b/site/releases/spark-release-2-0-0.html index 8d1ef52..40bd9f3 100644 --- a/site/releases/spark-release-2-0-0.html +++ b/site/releases/spark-release-2-0-0.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/releases/spark-release-2-0-1.html -- diff --git a/site/releases/spark-release-2-0-1.html b/site/releases/spark-release-2-0-1.html index 6adef9a..1860b0c 100644 --- a/site/releases/spark-release-2-0-1.html +++ b/site/releases/spark-release-2-0-1.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/releases/spark-release-2-0-2.html -- diff --git a/site/releases/spark-release-2-0-2.html b/site/releases/spark-release-2-0-2.html index 275bed6..9d903dd 100644 --- a/site/releases/spark-release-2-0-2.html +++ b/site/releases/spark-release-2-0-2.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/179a5897/site/releases/spark-release-2-1-0.html -- diff --git a/site/releases/spark-release-2-1-0.html b/site/releases/spark-release-2-1-0.html index ddaff2f..bf6980a 100644 --- a/site/releases/spark-release-2-1-0.html +++ b/site/releases/spark-release-2-1-0.html @@ -162,6 +162,9 @@ Latest News + Spark 2.2.2 released + (Jul 02, 2018) + Spark 2.1.3 released (Jun 29, 2018) @@ -171,9 +174,6 @@ Spark+AI Summit (June 4-6th, 2018, San Francisco) agenda posted (Mar 01, 2018) - Spark 2.3.0 released - (Feb 28, 2018) - Archive
[3/3] spark-website git commit: Spark 2.2.2 release announcements
Spark 2.2.2 release announcements 2.2.2 docs look fine, created the announcement and regenerated the docs. Author: Thomas Graves Closes #129 from tgravescs/spark222-announce. Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/179a5897 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/179a5897 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/179a5897 Branch: refs/heads/asf-site Commit: 179a5897a6b056ba84b3ef6fefac5cee020214ac Parents: 390743e Author: Thomas Graves Authored: Mon Jul 9 10:29:50 2018 -0500 Committer: Thomas Graves Committed: Mon Jul 9 10:29:50 2018 -0500 -- news/_posts/2018-07-02-spark-2-2-2-released.md | 14 ++ site/committers.html | 6 +++--- site/community.html | 6 +++--- site/contributing.html | 6 +++--- site/developer-tools.html| 6 +++--- site/documentation.html | 6 +++--- site/downloads.html | 6 +++--- site/examples.html | 6 +++--- site/faq.html| 6 +++--- site/graphx/index.html | 6 +++--- site/history.html| 6 +++--- site/improvement-proposals.html | 6 +++--- site/index.html | 6 +++--- site/mailing-lists.html | 6 +++--- site/mllib/index.html| 6 +++--- site/news/amp-camp-2013-registration-ope.html| 6 +++--- site/news/announcing-the-first-spark-summit.html | 6 +++--- site/news/fourth-spark-screencast-published.html | 6 +++--- site/news/index.html | 15 --- site/news/nsdi-paper.html| 6 +++--- site/news/one-month-to-spark-summit-2015.html| 6 +++--- site/news/proposals-open-for-spark-summit-east.html | 6 +++--- .../registration-open-for-spark-summit-east.html | 6 +++--- site/news/run-spark-and-shark-on-amazon-emr.html | 6 +++--- site/news/spark-0-6-1-and-0-5-2-released.html| 6 +++--- site/news/spark-0-6-2-released.html | 6 +++--- site/news/spark-0-7-0-released.html | 6 +++--- site/news/spark-0-7-2-released.html | 6 +++--- site/news/spark-0-7-3-released.html | 6 +++--- site/news/spark-0-8-0-released.html | 6 +++--- site/news/spark-0-8-1-released.html | 6 +++--- site/news/spark-0-9-0-released.html | 6 +++--- site/news/spark-0-9-1-released.html | 6 +++--- site/news/spark-0-9-2-released.html | 6 +++--- site/news/spark-1-0-0-released.html | 6 +++--- site/news/spark-1-0-1-released.html | 6 +++--- site/news/spark-1-0-2-released.html | 6 +++--- site/news/spark-1-1-0-released.html | 6 +++--- site/news/spark-1-1-1-released.html | 6 +++--- site/news/spark-1-2-0-released.html | 6 +++--- site/news/spark-1-2-1-released.html | 6 +++--- site/news/spark-1-2-2-released.html | 6 +++--- site/news/spark-1-3-0-released.html | 6 +++--- site/news/spark-1-4-0-released.html | 6 +++--- site/news/spark-1-4-1-released.html | 6 +++--- site/news/spark-1-5-0-released.html | 6 +++--- site/news/spark-1-5-1-released.html | 6 +++--- site/news/spark-1-5-2-released.html | 6 +++--- site/news/spark-1-6-0-released.html | 6 +++--- site/news/spark-1-6-1-released.html | 6 +++--- site/news/spark-1-6-2-released.html | 6 +++--- site/news/spark-1-6-3-released.html | 6 +++--- site/news/spark-2-0-0-released.html | 6 +++--- site/news/spark-2-0-1-released.html | 6 +++--- site/news/spark-2-0-2-released.html | 6 +++--- site/news/spark-2-1-0-released.html | 6 +++--- site/news/spark-2-1-1-released.html | 6 +++--- site/news/spark-2-1-2-released.html | 6 +++--- site/news/spark-2-1-3-released.html | 6 +++--- site/news/spark-2-2-0-released.html | 6 +++--- site/news/spark-2-2-1-released.html | 6 +++--- site/news/spark-2-3-0-released.html | 6 +++--- site/news/spark-2-3-1-released.html | 6 +++--- site/news/spark-2.0.0-preview.html |
spark-website git commit: Empty commit to trigger asf to github sync
Repository: spark-website Updated Branches: refs/heads/asf-site e10014632 -> 390743e8a Empty commit to trigger asf to github sync Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/390743e8 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/390743e8 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/390743e8 Branch: refs/heads/asf-site Commit: 390743e8a1522099931de7c99c1a2aba72852549 Parents: e100146 Author: Thomas Graves Authored: Mon Jul 9 08:42:43 2018 -0500 Committer: Thomas Graves Committed: Mon Jul 9 08:42:43 2018 -0500 -- -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[42/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/intersect.html -- diff --git a/site/docs/2.2.2/api/R/intersect.html b/site/docs/2.2.2/api/R/intersect.html new file mode 100644 index 000..9598e89 --- /dev/null +++ b/site/docs/2.2.2/api/R/intersect.html @@ -0,0 +1,113 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Intersect + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +intersect {SparkR}R Documentation + +Intersect + +Description + +Return a new SparkDataFrame containing rows only in both this SparkDataFrame +and another SparkDataFrame. This is equivalent to INTERSECT in SQL. + + + +Usage + + +intersect(x, y) + +## S4 method for signature 'SparkDataFrame,SparkDataFrame' +intersect(x, y) + + + +Arguments + + +x + +A SparkDataFrame + +y + +A SparkDataFrame + + + + +Value + +A SparkDataFrame containing the result of the intersect. + + + +Note + +intersect since 1.4.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, arrange, +as.data.frame, +attach,SparkDataFrame-method, +cache, checkpoint, +coalesce, collect, +colnames, coltypes, +createOrReplaceTempView, +crossJoin, dapplyCollect, +dapply, describe, +dim, distinct, +dropDuplicates, dropna, +drop, dtypes, +except, explain, +filter, first, +gapplyCollect, gapply, +getNumPartitions, group_by, +head, hint, +histogram, insertInto, +isLocal, isStreaming, +join, limit, +merge, mutate, +ncol, nrow, +persist, printSchema, +randomSplit, rbind, +registerTempTable, rename, +repartition, sample, +saveAsTable, schema, +selectExpr, select, +showDF, show, +storageLevel, str, +subset, take, +toJSON, union, +unpersist, withColumn, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D df1 - read.json(path) +##D df2 - read.json(path2) +##D intersectDF - intersect(df, df2) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/is.nan.html -- diff --git a/site/docs/2.2.2/api/R/is.nan.html b/site/docs/2.2.2/api/R/is.nan.html new file mode 100644 index 000..8adee7f --- /dev/null +++ b/site/docs/2.2.2/api/R/is.nan.html @@ -0,0 +1,76 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: is.nan + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +isnan {SparkR}R Documentation + +is.nan + +Description + +Return true if the column is NaN, alias for isnan + + + +Usage + + +isnan(x) + +## S4 method for signature 'Column' +is.nan(x) + +## S4 method for signature 'Column' +isnan(x) + + + +Arguments + + +x + +Column to compute on. + + + + +Note + +is.nan since 2.0.0 + +isnan since 2.0.0 + + + +See Also + +Other normal_funcs: abs, +bitwiseNOT, coalesce, +column, expr, +from_json, greatest, +ifelse, least, +lit, nanvl, +negate, randn, +rand, struct, +to_json, when + + + +Examples + +## Not run: +##D is.nan(df$c) +##D isnan(df$c) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/isActive.html -- diff --git a/site/docs/2.2.2/api/R/isActive.html b/site/docs/2.2.2/api/R/isActive.html new file mode 100644 index 000..518d27c --- /dev/null +++ b/site/docs/2.2.2/api/R/isActive.html @@ -0,0 +1,71 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: isActive + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +isActive {SparkR}R Documentation + +isActive + +Description + +Returns TRUE if this query is actively running. + + + +Usage + + +isActive(x) + +## S4 method for signature 'StreamingQuery' +isActive(x) + + + +Arguments + + +x + +a StreamingQuery. + + + + +Value + +TRUE if query is actively running, FALSE if stopped. + + + +Note + +isActive(StreamingQuery) since 2.2.0 + +experimental + + + +See Also + +Other StreamingQuery methods: awaitTermination, +explain, lastProgress, +queryName,
[50/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/documentation.md -- diff --git a/documentation.md b/documentation.md index 6c587e0..8a9b62d 100644 --- a/documentation.md +++ b/documentation.md @@ -14,6 +14,7 @@ navigation: Spark 2.3.1 Spark 2.3.0 + Spark 2.2.2 Spark 2.2.1 Spark 2.2.0 Spark 2.1.3 http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/js/downloads.js -- diff --git a/js/downloads.js b/js/downloads.js index 6bac01d..3344267 100644 --- a/js/downloads.js +++ b/js/downloads.js @@ -31,6 +31,7 @@ var packagesV8 = [hadoop2p7, hadoop2p6, hadoopFree, sources]; addRelease("2.3.1", new Date("06/08/2018"), packagesV8, true, true); addRelease("2.3.0", new Date("02/28/2018"), packagesV8, true, true); +addRelease("2.2.2", new Date("07/02/2018"), packagesV8, true, true); addRelease("2.2.1", new Date("12/01/2017"), packagesV8, true, true); addRelease("2.2.0", new Date("07/11/2017"), packagesV8, true, false); addRelease("2.1.3", new Date("06/29/2018"), packagesV7, true, true); http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/releases/_posts/2018-07-02-spark-release-2-2-2.md -- diff --git a/releases/_posts/2018-07-02-spark-release-2-2-2.md b/releases/_posts/2018-07-02-spark-release-2-2-2.md new file mode 100644 index 000..8992262 --- /dev/null +++ b/releases/_posts/2018-07-02-spark-release-2-2-2.md @@ -0,0 +1,20 @@ +--- +layout: post +title: Spark Release 2.2.2 +categories: [] +tags: [] +status: publish +type: post +published: true +meta: + _edit_last: '4' + _wpas_done_all: '1' +--- + +Spark 2.2.2 is a maintenance release containing stability fixes. This release is based on the branch-2.2 maintenance branch of Spark. We strongly recommend all 2.2.x users to upgrade to this stable release. + +You can consult JIRA for the [detailed changes](https://s.apache.org/spark-2.2.2) + +### Changes of behavior + +We would like to acknowledge all community members for contributing patches to this release. http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/README.md -- diff --git a/site/docs/2.2.2/README.md b/site/docs/2.2.2/README.md new file mode 100644 index 000..90e10a1 --- /dev/null +++ b/site/docs/2.2.2/README.md @@ -0,0 +1,73 @@ +Welcome to the Spark documentation! + +This readme will walk you through navigating and building the Spark documentation, which is included +here with the Spark source code. You can also find documentation specific to release versions of +Spark at http://spark.apache.org/documentation.html. + +Read on to learn more about viewing documentation in plain text (i.e., markdown) or building the +documentation yourself. Why build it yourself? So that you have the docs that corresponds to +whichever version of Spark you currently have checked out of revision control. + +## Prerequisites +The Spark documentation build uses a number of tools to build HTML docs and API docs in Scala, +Python and R. + +You need to have [Ruby](https://www.ruby-lang.org/en/documentation/installation/) and +[Python](https://docs.python.org/2/using/unix.html#getting-and-installing-the-latest-version-of-python) +installed. Also install the following libraries: +```sh +$ sudo gem install jekyll jekyll-redirect-from pygments.rb +$ sudo pip install Pygments +# Following is needed only for generating API docs +$ sudo pip install sphinx pypandoc +$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "roxygen2", "testthat", "rmarkdown"), repos="http://cran.stat.ucla.edu/;)' +``` +(Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0) + +## Generating the Documentation HTML + +We include the Spark documentation as part of the source (as opposed to using a hosted wiki, such as +the github wiki, as the definitive documentation) to enable the documentation to evolve along with +the source code and be captured by revision control (currently git). This way the code automatically +includes the version of the documentation that is relevant regardless of which version or release +you have checked out or downloaded. + +In this directory you will find textfiles formatted using Markdown, with an ".md" suffix. You can +read those text files directly if you want. Start with index.md. + +Execute `jekyll build` from the `docs/` directory to compile the site. Compiling the site with +Jekyll will create a directory called `_site` containing index.html as well as the rest of the +compiled files. + +$ cd docs +$ jekyll build + +You can modify the default Jekyll build as follows: +```sh +# Skip generating API docs (which takes a while) +$ SKIP_API=1 jekyll
[43/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/gapplyCollect.html -- diff --git a/site/docs/2.2.2/api/R/gapplyCollect.html b/site/docs/2.2.2/api/R/gapplyCollect.html new file mode 100644 index 000..64efe5b --- /dev/null +++ b/site/docs/2.2.2/api/R/gapplyCollect.html @@ -0,0 +1,183 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: gapplyCollect + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +gapplyCollect {SparkR}R Documentation + +gapplyCollect + +Description + +gapplyCollect + +Groups the SparkDataFrame using the specified columns, applies the R function to each +group and collects the result back to R as data.frame. + + + +Usage + + +gapplyCollect(x, ...) + +## S4 method for signature 'GroupedData' +gapplyCollect(x, func) + +## S4 method for signature 'SparkDataFrame' +gapplyCollect(x, cols, func) + + + +Arguments + + +x + +a SparkDataFrame or GroupedData. + +... + +additional argument(s) passed to the method. + +func + +a function to be applied to each group partition specified by grouping +column of the SparkDataFrame. The function func takes as argument +a key - grouping columns and a data frame - a local R data.frame. +The output of func is a local R data.frame. + +cols + +grouping columns. + + + + +Value + +A data.frame. + + + +Note + +gapplyCollect(GroupedData) since 2.0.0 + +gapplyCollect(SparkDataFrame) since 2.0.0 + + + +See Also + +gapply + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, arrange, +as.data.frame, +attach,SparkDataFrame-method, +cache, checkpoint, +coalesce, collect, +colnames, coltypes, +createOrReplaceTempView, +crossJoin, dapplyCollect, +dapply, describe, +dim, distinct, +dropDuplicates, dropna, +drop, dtypes, +except, explain, +filter, first, +gapply, getNumPartitions, +group_by, head, +hint, histogram, +insertInto, intersect, +isLocal, isStreaming, +join, limit, +merge, mutate, +ncol, nrow, +persist, printSchema, +randomSplit, rbind, +registerTempTable, rename, +repartition, sample, +saveAsTable, schema, +selectExpr, select, +showDF, show, +storageLevel, str, +subset, take, +toJSON, union, +unpersist, withColumn, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D Computes the arithmetic mean of the second column by grouping +##D on the first and third columns. Output the grouping values and the average. +##D +##D df - createDataFrame ( +##D list(list(1L, 1, 1, 0.1), list(1L, 2, 1, 0.2), list(3L, 3, 3, 0.3)), +##D c(a, b, c, d)) +##D +##D result - gapplyCollect( +##D df, +##D c(a, c), +##D function(key, x) { +##D y - data.frame(key, mean(x$b), stringsAsFactors = FALSE) +##D colnames(y) - c(key_a, key_c, mean_b) +##D y +##D }) +##D +##D We can also group the data and afterwards call gapply on GroupedData. +##D For Example: +##D gdf - group_by(df, a, c) +##D result - gapplyCollect( +##D gdf, +##D function(key, x) { +##D y - data.frame(key, mean(x$b), stringsAsFactors = FALSE) +##D colnames(y) - c(key_a, key_c, mean_b) +##D y +##D }) +##D +##D Result +##D -- +##D key_a key_c mean_b +##D 3 3 3.0 +##D 1 1 1.5 +##D +##D Fits linear models on iris dataset by grouping on the Species column and +##D using Sepal_Length as a target variable, Sepal_Width, Petal_Length +##D and Petal_Width as training features. +##D +##D df - createDataFrame (iris) +##D result - gapplyCollect( +##D df, +##D df$Species, +##D function(key, x) { +##D m - suppressWarnings(lm(Sepal_Length ~ +##D Sepal_Width + Petal_Length + Petal_Width, x)) +##D data.frame(t(coef(m))) +##D }) +##D +##D Result +##D - +##D Model X.Intercept. Sepal_Width Petal_Length Petal_Width +##D 10.6998830.33033700.9455356-0.1697527 +##D 21.8955400.38685760.9083370-0.6792238 +##D 32.3518900.65483500.2375602 0.2521257 +##D +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/generateAliasesForIntersectedCols.html -- diff --git a/site/docs/2.2.2/api/R/generateAliasesForIntersectedCols.html b/site/docs/2.2.2/api/R/generateAliasesForIntersectedCols.html new file mode 100644 index 000..a0993ce --- /dev/null +++ b/site/docs/2.2.2/api/R/generateAliasesForIntersectedCols.html @@ -0,0 +1,54 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Creates a list of columns by replacing the intersected ones... + + +
[37/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/spark.bisectingKmeans.html -- diff --git a/site/docs/2.2.2/api/R/spark.bisectingKmeans.html b/site/docs/2.2.2/api/R/spark.bisectingKmeans.html new file mode 100644 index 000..43b8cab --- /dev/null +++ b/site/docs/2.2.2/api/R/spark.bisectingKmeans.html @@ -0,0 +1,179 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Bisecting K-Means Clustering Model + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +spark.bisectingKmeans {SparkR}R Documentation + +Bisecting K-Means Clustering Model + +Description + +Fits a bisecting k-means clustering model against a SparkDataFrame. +Users can call summary to print a summary of the fitted model, predict to make +predictions on new data, and write.ml/read.ml to save/load fitted models. + +Get fitted result from a bisecting k-means model. +Note: A saved-loaded model does not support this method. + + + +Usage + + +spark.bisectingKmeans(data, formula, ...) + +## S4 method for signature 'SparkDataFrame,formula' +spark.bisectingKmeans(data, formula, k = 4, + maxIter = 20, seed = NULL, minDivisibleClusterSize = 1) + +## S4 method for signature 'BisectingKMeansModel' +summary(object) + +## S4 method for signature 'BisectingKMeansModel' +predict(object, newData) + +## S4 method for signature 'BisectingKMeansModel' +fitted(object, method = c("centers", + "classes")) + +## S4 method for signature 'BisectingKMeansModel,character' +write.ml(object, path, + overwrite = FALSE) + + + +Arguments + + +data + +a SparkDataFrame for training. + +formula + +a symbolic description of the model to be fitted. Currently only a few formula +operators are supported, including '~', '.', ':', '+', and '-'. +Note that the response variable of formula is empty in spark.bisectingKmeans. + +... + +additional argument(s) passed to the method. + +k + +the desired number of leaf clusters. Must be 1. +The actual number could be smaller if there are no divisible leaf clusters. + +maxIter + +maximum iteration number. + +seed + +the random seed. + +minDivisibleClusterSize + +The minimum number of points (if greater than or equal to 1.0) +or the minimum proportion of points (if less than 1.0) of a divisible cluster. +Note that it is an expert parameter. The default value should be good enough +for most cases. + +object + +a fitted bisecting k-means model. + +newData + +a SparkDataFrame for testing. + +method + +type of fitted results, "centers" for cluster centers +or "classes" for assigned classes. + +path + +the directory where the model is saved. + +overwrite + +overwrites or not if the output path already exists. Default is FALSE +which means throw exception if the output path exists. + + + + +Value + +spark.bisectingKmeans returns a fitted bisecting k-means model. + +summary returns summary information of the fitted model, which is a list. +The list includes the model's k (number of cluster centers), +coefficients (model cluster centers), +size (number of data points in each cluster), cluster +(cluster centers of the transformed data; cluster is NULL if is.loaded is TRUE), +and is.loaded (whether the model is loaded from a saved file). + +predict returns the predicted values based on a bisecting k-means model. + +fitted returns a SparkDataFrame containing fitted values. + + + +Note + +spark.bisectingKmeans since 2.2.0 + +summary(BisectingKMeansModel) since 2.2.0 + +predict(BisectingKMeansModel) since 2.2.0 + +fitted since 2.2.0 + +write.ml(BisectingKMeansModel, character) since 2.2.0 + + + +See Also + +predict, read.ml, write.ml + + + +Examples + +## Not run: +##D sparkR.session() +##D t - as.data.frame(Titanic) +##D df - createDataFrame(t) +##D model - spark.bisectingKmeans(df, Class ~ Survived, k = 4) +##D summary(model) +##D +##D # get fitted result from a bisecting k-means model +##D fitted.model - fitted(model, centers) +##D showDF(fitted.model) +##D +##D # fitted values on training data +##D fitted - predict(model, df) +##D head(select(fitted, Class, prediction)) +##D +##D # save fitted model to input path +##D path - path/to/model +##D write.ml(model, path) +##D +##D # can also read back the saved model and print +##D savedModel - read.ml(path) +##D summary(savedModel) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/spark.fpGrowth.html -- diff --git a/site/docs/2.2.2/api/R/spark.fpGrowth.html b/site/docs/2.2.2/api/R/spark.fpGrowth.html new file mode 100644 index
[33/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/write.jdbc.html -- diff --git a/site/docs/2.2.2/api/R/write.jdbc.html b/site/docs/2.2.2/api/R/write.jdbc.html new file mode 100644 index 000..b544b36 --- /dev/null +++ b/site/docs/2.2.2/api/R/write.jdbc.html @@ -0,0 +1,143 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Save the content of SparkDataFrame to an external database... + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +write.jdbc {SparkR}R Documentation + +Save the content of SparkDataFrame to an external database table via JDBC. + +Description + +Save the content of the SparkDataFrame to an external database table via JDBC. Additional JDBC +database connection properties can be set (...) + + + +Usage + + +write.jdbc(x, url, tableName, mode = "error", ...) + +## S4 method for signature 'SparkDataFrame,character,character' +write.jdbc(x, url, tableName, + mode = "error", ...) + + + +Arguments + + +x + +a SparkDataFrame. + +url + +JDBC database url of the form jdbc:subprotocol:subname. + +tableName + +yhe name of the table in the external database. + +mode + +one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default). + +... + +additional JDBC database connection properties. + + + + +Details + +Also, mode is used to specify the behavior of the save operation when +data already exists in the data source. There are four modes: + + + + append: Contents of this SparkDataFrame are expected to be appended to existing data. + + + overwrite: Existing data is expected to be overwritten by the contents of this +SparkDataFrame. + + + error: An exception is expected to be thrown. + + + ignore: The save operation is expected to not save the contents of the SparkDataFrame +and to not change the existing data. + + + + + +Note + +write.jdbc since 2.0.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, arrange, +as.data.frame, +attach,SparkDataFrame-method, +cache, checkpoint, +coalesce, collect, +colnames, coltypes, +createOrReplaceTempView, +crossJoin, dapplyCollect, +dapply, describe, +dim, distinct, +dropDuplicates, dropna, +drop, dtypes, +except, explain, +filter, first, +gapplyCollect, gapply, +getNumPartitions, group_by, +head, hint, +histogram, insertInto, +intersect, isLocal, +isStreaming, join, +limit, merge, +mutate, ncol, +nrow, persist, +printSchema, randomSplit, +rbind, registerTempTable, +rename, repartition, +sample, saveAsTable, +schema, selectExpr, +select, showDF, +show, storageLevel, +str, subset, +take, toJSON, +union, unpersist, +withColumn, with, +write.df, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D jdbcUrl - jdbc:mysql://localhost:3306/databasename +##D write.jdbc(df, jdbcUrl, table, user = username, password = password) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/write.json.html -- diff --git a/site/docs/2.2.2/api/R/write.json.html b/site/docs/2.2.2/api/R/write.json.html new file mode 100644 index 000..e5e617b --- /dev/null +++ b/site/docs/2.2.2/api/R/write.json.html @@ -0,0 +1,116 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Save the contents of SparkDataFrame as a JSON file + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +write.json {SparkR}R Documentation + +Save the contents of SparkDataFrame as a JSON file + +Description + +Save the contents of a SparkDataFrame as a JSON file (http://jsonlines.org/;> +JSON Lines text format or newline-delimited JSON). Files written out +with this method can be read back in as a SparkDataFrame using read.json(). + + + +Usage + + +write.json(x, path, ...) + +## S4 method for signature 'SparkDataFrame,character' +write.json(x, path, mode = "error", ...) + + + +Arguments + + +x + +A SparkDataFrame + +path + +The directory where the file is saved + +... + +additional argument(s) passed to the method. + +mode + +one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default) + + + + +Note + +write.json since 1.6.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, arrange, +as.data.frame,
[24/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclustering%2FLDAModel.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclustering%2FLDAModel.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclustering%2FLDAModel.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclustering%2FLDAModel.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclustering%2FLocalLDAModel.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclustering%2FLocalLDAModel.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclustering%2FLocalLDAModel.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclustering%2FLocalLDAModel.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) +
[36/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/spark.randomForest.html -- diff --git a/site/docs/2.2.2/api/R/spark.randomForest.html b/site/docs/2.2.2/api/R/spark.randomForest.html new file mode 100644 index 000..5d43b44 --- /dev/null +++ b/site/docs/2.2.2/api/R/spark.randomForest.html @@ -0,0 +1,237 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Random Forest Model for Regression and Classification + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +spark.randomForest {SparkR}R Documentation + +Random Forest Model for Regression and Classification + +Description + +spark.randomForest fits a Random Forest Regression model or Classification model on +a SparkDataFrame. Users can call summary to get a summary of the fitted Random Forest +model, predict to make predictions on new data, and write.ml/read.ml to +save/load fitted models. +For more details, see +http://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-regression;> +Random Forest Regression and +http://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-classifier;> +Random Forest Classification + + + +Usage + + +spark.randomForest(data, formula, ...) + +## S4 method for signature 'SparkDataFrame,formula' +spark.randomForest(data, formula, + type = c("regression", "classification"), maxDepth = 5, maxBins = 32, + numTrees = 20, impurity = NULL, featureSubsetStrategy = "auto", + seed = NULL, subsamplingRate = 1, minInstancesPerNode = 1, + minInfoGain = 0, checkpointInterval = 10, maxMemoryInMB = 256, + cacheNodeIds = FALSE) + +## S4 method for signature 'RandomForestRegressionModel' +summary(object) + +## S3 method for class 'summary.RandomForestRegressionModel' +print(x, ...) + +## S4 method for signature 'RandomForestClassificationModel' +summary(object) + +## S3 method for class 'summary.RandomForestClassificationModel' +print(x, ...) + +## S4 method for signature 'RandomForestRegressionModel' +predict(object, newData) + +## S4 method for signature 'RandomForestClassificationModel' +predict(object, newData) + +## S4 method for signature 'RandomForestRegressionModel,character' +write.ml(object, path, + overwrite = FALSE) + +## S4 method for signature 'RandomForestClassificationModel,character' +write.ml(object, path, + overwrite = FALSE) + + + +Arguments + + +data + +a SparkDataFrame for training. + +formula + +a symbolic description of the model to be fitted. Currently only a few formula +operators are supported, including '~', ':', '+', and '-'. + +... + +additional arguments passed to the method. + +type + +type of model, one of regression or classification, to fit + +maxDepth + +Maximum depth of the tree (= 0). + +maxBins + +Maximum number of bins used for discretizing continuous features and for choosing +how to split on features at each node. More bins give higher granularity. Must be += 2 and = number of categories in any categorical feature. + +numTrees + +Number of trees to train (= 1). + +impurity + +Criterion used for information gain calculation. +For regression, must be variance. For classification, must be one of +entropy and gini, default is gini. + +featureSubsetStrategy + +The number of features to consider for splits at each tree node. +Supported options: auto, all, onethird, sqrt, log2, (0.0-1.0], [1-n]. + +seed + +integer seed for random number generation. + +subsamplingRate + +Fraction of the training data used for learning each decision tree, in +range (0, 1]. + +minInstancesPerNode + +Minimum number of instances each child must have after split. + +minInfoGain + +Minimum information gain for a split to be considered at a tree node. + +checkpointInterval + +Param for set checkpoint interval (= 1) or disable checkpoint (-1). + +maxMemoryInMB + +Maximum memory in MB allocated to histogram aggregation. + +cacheNodeIds + +If FALSE, the algorithm will pass trees to executors to match instances with +nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching +can speed up training of deeper trees. Users can set how often should the +cache be checkpointed or disable it by setting checkpointInterval. + +object + +A fitted Random Forest regression model or classification model. + +x + +summary object of Random Forest regression model or classification model +returned by summary. + +newData + +a SparkDataFrame for testing. + +path + +The directory where the model is saved. + +overwrite + +Overwrites or not if the output path already exists. Default is FALSE +which means throw exception if the output path exists. + + + + +Value + +spark.randomForest returns a
[19/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FLeastSquaresCostFun.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FLeastSquaresCostFun.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FLeastSquaresCostFun.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FLeastSquaresCostFun.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FLinearRegression.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FLinearRegression.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FLinearRegression.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FLinearRegression.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +
[28/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html -- diff --git a/site/docs/2.2.2/api/java/index.html b/site/docs/2.2.2/api/java/index.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?constant-values.html -- diff --git a/site/docs/2.2.2/api/java/index.html?constant-values.html b/site/docs/2.2.2/api/java/index.html?constant-values.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?constant-values.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to
[38/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/sd.html -- diff --git a/site/docs/2.2.2/api/R/sd.html b/site/docs/2.2.2/api/R/sd.html new file mode 100644 index 000..16a114a --- /dev/null +++ b/site/docs/2.2.2/api/R/sd.html @@ -0,0 +1,85 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: sd + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +sd {SparkR}R Documentation + +sd + +Description + +Aggregate function: alias for stddev_samp + + + +Usage + + +sd(x, na.rm = FALSE) + +stddev(x) + +## S4 method for signature 'Column' +sd(x) + +## S4 method for signature 'Column' +stddev(x) + + + +Arguments + + +x + +Column to compute on. + +na.rm + +currently not used. + + + + +Note + +sd since 1.6.0 + +stddev since 1.6.0 + + + +See Also + +stddev_pop, stddev_samp + +Other agg_funcs: agg, avg, +countDistinct, count, +first, kurtosis, +last, max, +mean, min, +skewness, stddev_pop, +stddev_samp, sumDistinct, +sum, var_pop, +var_samp, var + + + +Examples + +## Not run: +##D stddev(df$c) +##D select(df, stddev(df$age)) +##D agg(df, sd(df$age)) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/second.html -- diff --git a/site/docs/2.2.2/api/R/second.html b/site/docs/2.2.2/api/R/second.html new file mode 100644 index 000..7be8f0c --- /dev/null +++ b/site/docs/2.2.2/api/R/second.html @@ -0,0 +1,72 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: second + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +second {SparkR}R Documentation + +second + +Description + +Extracts the seconds as an integer from a given date/timestamp/string. + + + +Usage + + +second(x) + +## S4 method for signature 'Column' +second(x) + + + +Arguments + + +x + +Column to compute on. + + + + +Note + +second since 1.5.0 + + + +See Also + +Other datetime_funcs: add_months, +date_add, date_format, +date_sub, datediff, +dayofmonth, dayofyear, +from_unixtime, +from_utc_timestamp, hour, +last_day, minute, +months_between, month, +next_day, quarter, +to_date, to_timestamp, +to_utc_timestamp, +unix_timestamp, weekofyear, +window, year + + + +Examples + +## Not run: second(df$c) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/select.html -- diff --git a/site/docs/2.2.2/api/R/select.html b/site/docs/2.2.2/api/R/select.html new file mode 100644 index 000..9bc6023 --- /dev/null +++ b/site/docs/2.2.2/api/R/select.html @@ -0,0 +1,153 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Select + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +select {SparkR}R Documentation + +Select + +Description + +Selects a set of columns with names or Column expressions. + + + +Usage + + +select(x, col, ...) + +## S4 method for signature 'SparkDataFrame' +x$name + +## S4 replacement method for signature 'SparkDataFrame' +x$name - value + +## S4 method for signature 'SparkDataFrame,character' +select(x, col, ...) + +## S4 method for signature 'SparkDataFrame,Column' +select(x, col, ...) + +## S4 method for signature 'SparkDataFrame,list' +select(x, col) + + + +Arguments + + +x + +a SparkDataFrame. + +col + +a list of columns or single Column or name. + +... + +additional column(s) if only one column is specified in col. +If more than one column is assigned in col, ... +should be left empty. + +name + +name of a Column (without being wrapped by ""). + +value + +a Column or an atomic vector in the length of 1 as literal value, or NULL. +If NULL, the specified Column is dropped. + + + + +Value + +A new SparkDataFrame with selected columns. + + + +Note + +$ since 1.4.0 + +$- since 1.4.0 + +select(SparkDataFrame, character) since 1.4.0 + +select(SparkDataFrame, Column) since 1.4.0 + +select(SparkDataFrame, list) since 1.4.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg,
[08/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/lib/jquery.js -- diff --git a/site/docs/2.2.2/api/java/lib/jquery.js b/site/docs/2.2.2/api/java/lib/jquery.js new file mode 100644 index 000..bc3fbc8 --- /dev/null +++ b/site/docs/2.2.2/api/java/lib/jquery.js @@ -0,0 +1,2 @@ +/*! jQuery v1.8.2 jquery.com | jquery.org/license */ +(function(a,b){function G(a){var b=F[a]={};return p.each(a.split(s),function(a,c){b[c]=!0}),b}function J(a,c,d){if(d===b&===1){var e="data-"+c.replace(I,"-$1").toLowerCase();d=a.getAttribute(e);if(typeof d=="string"){try{d=d==="true"?!0:d==="false"?!1:d==="null"?null:+d+""===d?+d:H.test(d)?p.parseJSON(d):d}catch(f){}p.data(a,c,d)}else d=b}return d}function K(a){var b;for(b in a){if(b==="data"&(a[b]))continue;if(b!=="toJSON")return!1}return!0}function ba(){return!1}function bb(){return!0}function bh(a){return!a||!a.parentNode||a.parentNode.nodeType===11}function bi(a,b){do a=a[b];while(a&!==1);return a}function bj(a,b,c){b=b||0;if(p.isFunction(b))return p.grep(a,function(a,d){var e=!!b.call(a,d,a);return e===c});if(b.nodeType)return p.grep(a,function(a,d){return a===b===c});if(typeof b=="string"){var d=p.grep(a,function(a){return a.nodeType===1});if(be.test(b))return p.filter(b,d,!c);b=p.filter(b,d)}return p.grep(a,function(a,d){return p.inArray( a,b)>=0===c})}function bk(a){var b=bl.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}function bC(a,b){return a.getElementsByTagName(b)[0]||a.appendChild(a.ownerDocument.createElement(b))}function bD(a,b){if(b.nodeType!==1||!p.hasData(a))return;var c,d,e,f=p._data(a),g=p._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;d").appendTo(e.body),c=b.css("display");b.remove();if(c==="none"||c===""){bI=e.body.appendChild(bI||p.extend(e.createElement("iframe"),{frameBorder:0,width:0,height:0}));if(!bJ||!bI. createElement)bJ=(bI.contentWindow||bI.contentDocument).document,bJ.write(""),bJ.close();b=bJ.body.appendChild(bJ.createElement(a)),c=bH(b,"display"),e.body.removeChild(bI)}return bS[a]=c,c}function ci(a,b,c,d){var e;if(p.isArray(b))p.each(b,function(b,e){c||ce.test(a)?d(a,e):ci(a+"["+(typeof e=="object"?b:"")+"]",e,c,d)});else if(!c&(b)==="object")for(e in b)ci(a+"["+e+"]",b[e],c,d);else d(a,b)}function cz(a){return function(b,c){typeof b!="string"&&(c=b,b="*");var d,e,f,g=b.toLowerCase().split(s),h=0,i=g.length;if(p.isFunction(c))for(;h)[^>]*$|#([\w\-]*)$)/,v=/^<(\w+)\s*\/?>(?:<\/\1>|)$/,w=/^[\],:{}\s]*$/,x=/(?:^|:|,)(?:\s*\[)+/g,y=/\\(?:["\\\/bfnrt]|u[\da-fA-F]{4})/g,z=/"[^"\\\r\n]*"|true|false|null|-?(?:\d\d*\.|)\d+(?:[eE][\-+]?\d+|)/g,A=/^-ms-/,B=/-([\da-z])/gi,C=function(a,b){return(b+"").toUpperCase()},D=function(){e.addEventListener?(e.removeEventListener("DOMContentLoaded",D,!1),p.ready()):e.readyState==="complete"&&(e.detachEvent("onreadystatechange",D),p.ready())},E={};p.fn=p.prototype={constructor:p,init :function(a,c,d){var f,g,h,i;if(!a)return this;if(a.nodeType)return this.context=this[0]=a,this.length=1,this;if(typeof a=="string"){a.charAt(0)==="<"&(a.length-1)===">"&>=3?f=[null,a,null]:f=u.exec(a);if(f&&(f[1]||!c)){if(f[1])return c=c instanceof p?c[0]:c,i=c&?c.ownerDocument||c:e,a=p.parseHTML(f[1],i,!0),v.test(f[1])&(c)&(a,c,!0),p.merge(this,a);g=e.getElementById(f[2]);if(g&){if(g.id!==f[2])return d.find(a);this.length=1,this[0]=g}return this.context=e,this.selector=a,this}return!c||c.jquery?(c||d).find(a):this.constructor(c).find(a)}return p.isFunction(a)?d.ready(a):(a.selector!==b&&(this.selector=a.selector,this.context=a.context),p.makeArray(a,this))},selector:"",jquery:"1.8.2",length:0,size:function(){return this.length},toArray:function(){return k.call(this)},get:function(a){return a==null?this.toArray():a<0?this[this.length+a]:this[a]},pushStack:function(a,b,c){var d=p.merge(this.constructor(),a);ret urn d.prevObject=this,d.context=this.context,b==="find"?d.selector=this.selector+(this.selector?" ":"")+c:b&&(d.selector=this.selector+"."+b+"("+c+")"),d},each:function(a,b){return p.each(this,a,b)},ready:function(a){return p.ready.promise().done(a),this},eq:function(a){return a=+a,a===-1?this.slice(a):this.slice(a,a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(k.apply(this,arguments),"slice",k.call(arguments).join(","))},map:function(a){return this.pushStack(p.map(this,function(b,c){return a.call(b,c,b)}))},end:function(){return this.prevObject||this.constructor(null)},push:j,sort:[].sort,splice:[].splice},p.fn.init.prototype=p.fn,p.extend=p.fn.extend=function(){var a,c,d,e,f,g,h=arguments[0]||{},i=1,j=arguments.length,k=!1;typeof h=="boolean"&&(k=h,h=arguments[1]||{},i=2),typeof
[12/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ftree%2Fmodel%2FDecisionTreeModel.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ftree%2Fmodel%2FDecisionTreeModel.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ftree%2Fmodel%2FDecisionTreeModel.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ftree%2Fmodel%2FDecisionTreeModel.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ftree%2Fmodel%2FGradientBoostedTreesModel.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ftree%2Fmodel%2FGradientBoostedTreesModel.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ftree%2Fmodel%2FGradientBoostedTreesModel.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ftree%2Fmodel%2FGradientBoostedTreesModel.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +
[49/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/00Index.html -- diff --git a/site/docs/2.2.2/api/R/00Index.html b/site/docs/2.2.2/api/R/00Index.html new file mode 100644 index 000..f77a52b --- /dev/null +++ b/site/docs/2.2.2/api/R/00Index.html @@ -0,0 +1,1715 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;> +http://www.w3.org/1999/xhtml;> +R: R Frontend for Apache Spark + + + + R Frontend for Apache Spark +http://stat.ethz.ch/R-manual/R-devel/doc/html/Rlogo.svg; alt="[R logo]" /> + + + +http://stat.ethz.ch/R-manual/R-devel/doc/html/packages.html;>http://stat.ethz.ch/R-manual/R-devel/doc/html/left.jpg; alt="[Up]" /> +http://stat.ethz.ch/R-manual/R-devel/doc/html/index.html;>http://stat.ethz.ch/R-manual/R-devel/doc/html/up.jpg; alt="[Top]" /> +Documentation for package SparkR version 2.2.2 + +DESCRIPTION file. + + +Help Pages + + + +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +Y +misc + + + +-- A -- + + +abs +abs +abs-method +abs +acos +acos +acos-method +acos +add_months +add_months +add_months-method +add_months +AFTSurvivalRegressionModel-class +S4 class that represents a AFTSurvivalRegressionModel +agg +summarize +agg-method +summarize +alias +alias +alias-method +alias +ALSModel-class +S4 class that represents an ALSModel +approxCountDistinct +Returns the approximate number of distinct items in a group +approxCountDistinct-method +Returns the approximate number of distinct items in a group +approxQuantile +Calculates the approximate quantiles of numerical columns of a SparkDataFrame +approxQuantile-method +Calculates the approximate quantiles of numerical columns of a SparkDataFrame +arrange +Arrange Rows by Variables +arrange-method +Arrange Rows by Variables +array_contains +array_contains +array_contains-method +array_contains +as.data.frame +Download data from a SparkDataFrame into a R data.frame +as.data.frame-method +Download data from a SparkDataFrame into a R data.frame +as.DataFrame +Create a SparkDataFrame +as.DataFrame.default +Create a SparkDataFrame +asc +A set of operations working with SparkDataFrame columns +ascii +ascii +ascii-method +ascii +asin +asin +asin-method +asin +associationRules-method +FP-growth +atan +atan +atan-method +atan +atan2 +atan2 +atan2-method +atan2 +attach +Attach SparkDataFrame to R search path +attach-method +Attach SparkDataFrame to R search path +avg +avg +avg-method +avg +awaitTermination +awaitTermination +awaitTermination-method +awaitTermination + + +-- B -- + + +base64 +base64 +base64-method +base64 +between +between +between-method +between +bin +bin +bin-method +bin +BisectingKMeansModel-class +S4 class that represents a BisectingKMeansModel +bitwiseNOT +bitwiseNOT +bitwiseNOT-method +bitwiseNOT +bround +bround +bround-method +bround + + +-- C -- + + +cache +Cache +cache-method +Cache +cacheTable +Cache Table +cacheTable.default +Cache Table +cancelJobGroup +Cancel active jobs for the specified group +cancelJobGroup.default +Cancel active jobs for the specified group +cast +Casts the column to a different data type. +cast-method +Casts the column to a different data type. +cbrt +cbrt +cbrt-method +cbrt +ceil +Computes the ceiling of the given value +ceil-method +Computes the ceiling of the given value +ceiling +Computes the ceiling of the given value +ceiling-method +Computes the ceiling of the given value +checkpoint +checkpoint +checkpoint-method +checkpoint +clearCache +Clear Cache +clearCache.default +Clear Cache +clearJobGroup +Clear current job group ID and its description +clearJobGroup.default +Clear current job group ID and its description +coalesce +Coalesce +coalesce-method +Coalesce +collect +Collects all the elements of a SparkDataFrame and coerces them into an R data.frame. +collect-method +Collects all the elements of a SparkDataFrame and coerces them into an R data.frame. +colnames +Column Names of SparkDataFrame +colnames-method +Column Names of SparkDataFrame +colnames- +Column Names of SparkDataFrame +colnames--method +Column Names of SparkDataFrame +coltypes +coltypes +coltypes-method +coltypes +coltypes- +coltypes +coltypes--method +coltypes +column +S4 class that represents a SparkDataFrame column +Column-class +S4 class that represents a SparkDataFrame column +column-method +S4 class that represents a SparkDataFrame column +columnfunctions +A set of operations working with SparkDataFrame columns +columns +Column Names of SparkDataFrame +columns-method +Column Names of SparkDataFrame +concat +concat +concat-method +concat +concat_ws +concat_ws +concat_ws-method +concat_ws +contains +A set of operations working with SparkDataFrame columns +conv +conv +conv-method +conv +corr +corr +corr-method +corr +cos +cos +cos-method +cos +cosh +cosh +cosh-method +cosh +count +Count +count-method +Count +count-method +Returns the number of rows in a SparkDataFrame +countDistinct +Count Distinct
[18/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclassification%2FClassificationModel.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclassification%2FClassificationModel.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclassification%2FClassificationModel.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclassification%2FClassificationModel.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclassification%2FLogisticRegressionModel.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclassification%2FLogisticRegressionModel.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclassification%2FLogisticRegressionModel.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclassification%2FLogisticRegressionModel.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep =
[46/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/coalesce.html -- diff --git a/site/docs/2.2.2/api/R/coalesce.html b/site/docs/2.2.2/api/R/coalesce.html new file mode 100644 index 000..f52fce8 --- /dev/null +++ b/site/docs/2.2.2/api/R/coalesce.html @@ -0,0 +1,147 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Coalesce + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +coalesce {SparkR}R Documentation + +Coalesce + +Description + +Returns a new SparkDataFrame that has exactly numPartitions partitions. +This operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100 +partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of +the current partitions. If a larger number of partitions is requested, it will stay at the +current number of partitions. + +Returns the first column that is not NA, or NA if all inputs are. + + + +Usage + + +coalesce(x, ...) + +## S4 method for signature 'SparkDataFrame' +coalesce(x, numPartitions) + +## S4 method for signature 'Column' +coalesce(x, ...) + + + +Arguments + + +x + +a Column or a SparkDataFrame. + +... + +additional argument(s). If x is a Column, additional Columns can be optionally +provided. + +numPartitions + +the number of partitions to use. + + + + +Details + +However, if you're doing a drastic coalesce on a SparkDataFrame, e.g. to numPartitions = 1, +this may result in your computation taking place on fewer nodes than +you like (e.g. one node in the case of numPartitions = 1). To avoid this, +call repartition. This will add a shuffle step, but means the +current upstream partitions will be executed in parallel (per whatever +the current partitioning is). + + + +Note + +coalesce(SparkDataFrame) since 2.1.1 + +coalesce(Column) since 2.1.1 + + + +See Also + +repartition + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, arrange, +as.data.frame, +attach,SparkDataFrame-method, +cache, checkpoint, +collect, colnames, +coltypes, +createOrReplaceTempView, +crossJoin, dapplyCollect, +dapply, describe, +dim, distinct, +dropDuplicates, dropna, +drop, dtypes, +except, explain, +filter, first, +gapplyCollect, gapply, +getNumPartitions, group_by, +head, hint, +histogram, insertInto, +intersect, isLocal, +isStreaming, join, +limit, merge, +mutate, ncol, +nrow, persist, +printSchema, randomSplit, +rbind, registerTempTable, +rename, repartition, +sample, saveAsTable, +schema, selectExpr, +select, showDF, +show, storageLevel, +str, subset, +take, toJSON, +union, unpersist, +withColumn, with, +write.df, write.jdbc, +write.json, write.orc, +write.parquet, write.stream, +write.text + +Other normal_funcs: abs, +bitwiseNOT, column, +expr, from_json, +greatest, ifelse, +isnan, least, +lit, nanvl, +negate, randn, +rand, struct, +to_json, when + + + +Examples + +## Not run: +##D sparkR.session() +##D path - path/to/file.json +##D df - read.json(path) +##D newDF - coalesce(df, 1L) +## End(Not run) +## Not run: coalesce(df$c, df$d, df$e) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/collect.html -- diff --git a/site/docs/2.2.2/api/R/collect.html b/site/docs/2.2.2/api/R/collect.html new file mode 100644 index 000..cbd2ea8 --- /dev/null +++ b/site/docs/2.2.2/api/R/collect.html @@ -0,0 +1,113 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Collects all the elements of a SparkDataFrame and coerces... + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +collect {SparkR}R Documentation + +Collects all the elements of a SparkDataFrame and coerces them into an R data.frame. + +Description + +Collects all the elements of a SparkDataFrame and coerces them into an R data.frame. + + + +Usage + + +collect(x, ...) + +## S4 method for signature 'SparkDataFrame' +collect(x, stringsAsFactors = FALSE) + + + +Arguments + + +x + +a SparkDataFrame. + +... + +further arguments to be passed to or from other methods. + +stringsAsFactors + +(Optional) a logical indicating whether or not string columns +should be converted to factors. FALSE by default. + + + + +Note + +collect since 1.4.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, arrange, +as.data.frame,
[25/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclassification%2FLogisticRegressionTrainingSummary.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclassification%2FLogisticRegressionTrainingSummary.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclassification%2FLogisticRegressionTrainingSummary.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclassification%2FLogisticRegressionTrainingSummary.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclassification%2FMultilayerPerceptronClassificationModel.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclassification%2FMultilayerPerceptronClassificationModel.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclassification%2FMultilayerPerceptronClassificationModel.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fclassification%2FMultilayerPerceptronClassificationModel.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +
[09/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fstreaming%2FStreamingQueryStatus.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fstreaming%2FStreamingQueryStatus.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fstreaming%2FStreamingQueryStatus.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fstreaming%2FStreamingQueryStatus.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fstreaming%2FTrigger.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fstreaming%2FTrigger.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fstreaming%2FTrigger.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fstreaming%2FTrigger.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' &&
[30/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/constant-values.html -- diff --git a/site/docs/2.2.2/api/java/constant-values.html b/site/docs/2.2.2/api/java/constant-values.html new file mode 100644 index 000..a35ac6d --- /dev/null +++ b/site/docs/2.2.2/api/java/constant-values.html @@ -0,0 +1,242 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Constant Field Values (Spark 2.2.2 JavaDoc) + + + + + + + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +NoFrames + + +AllClasses + + + + + + + + + + +Constant Field Values +Contents + +org.apache.* + + + + + +org.apache.* + + + +org.apache.spark.launcher.SparkLauncher + +Modifier and Type +Constant Field +Value + + + + + +publicstaticfinalString +CHILD_CONNECTION_TIMEOUT +"spark.launcher.childConectionTimeout" + + + + +publicstaticfinalString +CHILD_PROCESS_LOGGER_NAME +"spark.launcher.childProcLoggerName" + + + + +publicstaticfinalString +DEPLOY_MODE +"spark.submit.deployMode" + + + + +publicstaticfinalString +DRIVER_EXTRA_CLASSPATH +"spark.driver.extraClassPath" + + + + +publicstaticfinalString +DRIVER_EXTRA_JAVA_OPTIONS +"spark.driver.extraJavaOptions" + + + + +publicstaticfinalString +DRIVER_EXTRA_LIBRARY_PATH +"spark.driver.extraLibraryPath" + + + + +publicstaticfinalString +DRIVER_MEMORY +"spark.driver.memory" + + + + +publicstaticfinalString +EXECUTOR_CORES +"spark.executor.cores" + + + + +publicstaticfinalString +EXECUTOR_EXTRA_CLASSPATH +"spark.executor.extraClassPath" + + + + +publicstaticfinalString +EXECUTOR_EXTRA_JAVA_OPTIONS +"spark.executor.extraJavaOptions" + + + + +publicstaticfinalString +EXECUTOR_EXTRA_LIBRARY_PATH +"spark.executor.extraLibraryPath" + + + + +publicstaticfinalString +EXECUTOR_MEMORY +"spark.executor.memory" + + + + +publicstaticfinalString +NO_RESOURCE +"spark-internal" + + + + +publicstaticfinalString +SPARK_MASTER +"spark.master" + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +NoFrames + + +AllClasses + + + + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/deprecated-list.html -- diff --git a/site/docs/2.2.2/api/java/deprecated-list.html b/site/docs/2.2.2/api/java/deprecated-list.html new file mode 100644 index 000..ec6059e --- /dev/null +++ b/site/docs/2.2.2/api/java/deprecated-list.html @@ -0,0 +1,841 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Deprecated List (Spark 2.2.2 JavaDoc) + + + + + + + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +NoFrames + + +AllClasses + + + + + + + + + + +Deprecated API +Contents + +Deprecated Interfaces +Deprecated Classes +Deprecated Methods +Deprecated Constructors + + + + + + + + +Deprecated Interfaces + +Interface and Description + + + +org.apache.spark.AccumulableParam +use AccumulatorV2. Since 2.0.0. + + + +org.apache.spark.AccumulatorParam +use AccumulatorV2. Since 2.0.0. + + + + + + + + + + + + +Deprecated Classes + +Class and Description + + + +org.apache.spark.Accumulable +use AccumulatorV2. Since 2.0.0. + + + +org.apache.spark.Accumulator +use AccumulatorV2. Since 2.0.0. + + + +org.apache.spark.AccumulatorParam.DoubleAccumulatorParam$ +use AccumulatorV2. Since 2.0.0. + + + +org.apache.spark.AccumulatorParam.FloatAccumulatorParam$ +use AccumulatorV2. Since 2.0.0. + + + +org.apache.spark.AccumulatorParam.IntAccumulatorParam$ +use AccumulatorV2. Since 2.0.0. + + + +org.apache.spark.AccumulatorParam.LongAccumulatorParam$ +use AccumulatorV2.
[32/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/allclasses-frame.html -- diff --git a/site/docs/2.2.2/api/java/allclasses-frame.html b/site/docs/2.2.2/api/java/allclasses-frame.html new file mode 100644 index 000..97eb314 --- /dev/null +++ b/site/docs/2.2.2/api/java/allclasses-frame.html @@ -0,0 +1,1199 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +All Classes (Spark 2.2.2 JavaDoc) + + + + + +AllClasses + + +AbsoluteError +Accumulable +AccumulableInfo +AccumulableInfo +AccumulableParam +Accumulator +AccumulatorContext +AccumulatorParam +AccumulatorParam.DoubleAccumulatorParam$ +AccumulatorParam.FloatAccumulatorParam$ +AccumulatorParam.IntAccumulatorParam$ +AccumulatorParam.LongAccumulatorParam$ +AccumulatorParam.StringAccumulatorParam$ +AccumulatorV2 +AFTAggregator +AFTCostFun +AFTSurvivalRegression +AFTSurvivalRegressionModel +AggregatedDialect +AggregatingEdgeContext +Aggregator +Aggregator +Algo +AllJobsCancelled +AllReceiverIds +ALS +ALS +ALS.InBlock$ +ALS.Rating +ALS.Rating$ +ALS.RatingBlock$ +ALSModel +AnalysisException +And +AnyDataType +ApplicationAttemptInfo +ApplicationEnvironmentInfo +ApplicationInfo +ApplicationsListResource +ApplicationStatus +ApplyInPlace +AreaUnderCurve +ArrayType +AskPermissionToCommitOutput +AssociationRules +AssociationRules +AssociationRules.Rule +AsyncRDDActions +Attribute +AttributeGroup +AttributeKeys +AttributeType +BaseRelation +BaseRRDD +BasicBlockReplicationPolicy +BatchInfo +BatchInfo +BatchStatus +BernoulliCellSampler +BernoulliSampler +Binarizer +BinaryAttribute +BinaryClassificationEvaluator +BinaryClassificationMetrics +BinaryLogisticRegressionSummary +BinaryLogisticRegressionTrainingSummary +BinarySample +BinaryType +BinomialBounds +BisectingKMeans +BisectingKMeans +BisectingKMeansModel +BisectingKMeansModel +BisectingKMeansModel.SaveLoadV1_0$ +BisectingKMeansSummary +BlacklistedExecutor +BLAS +BLAS +BlockId +BlockManagerId +BlockManagerMessages +BlockManagerMessages.BlockManagerHeartbeat +BlockManagerMessages.BlockManagerHeartbeat$ +BlockManagerMessages.GetBlockStatus +BlockManagerMessages.GetBlockStatus$ +BlockManagerMessages.GetExecutorEndpointRef +BlockManagerMessages.GetExecutorEndpointRef$ +BlockManagerMessages.GetLocations +BlockManagerMessages.GetLocations$ +BlockManagerMessages.GetLocationsMultipleBlockIds +BlockManagerMessages.GetLocationsMultipleBlockIds$ +BlockManagerMessages.GetMatchingBlockIds +BlockManagerMessages.GetMatchingBlockIds$ +BlockManagerMessages.GetMemoryStatus$ +BlockManagerMessages.GetPeers +BlockManagerMessages.GetPeers$ +BlockManagerMessages.GetStorageStatus$ +BlockManagerMessages.HasCachedBlocks +BlockManagerMessages.HasCachedBlocks$ +BlockManagerMessages.RegisterBlockManager +BlockManagerMessages.RegisterBlockManager$ +BlockManagerMessages.RemoveBlock +BlockManagerMessages.RemoveBlock$ +BlockManagerMessages.RemoveBroadcast +BlockManagerMessages.RemoveBroadcast$ +BlockManagerMessages.RemoveExecutor +BlockManagerMessages.RemoveExecutor$ +BlockManagerMessages.RemoveRdd +BlockManagerMessages.RemoveRdd$ +BlockManagerMessages.RemoveShuffle +BlockManagerMessages.RemoveShuffle$ +BlockManagerMessages.ReplicateBlock +BlockManagerMessages.ReplicateBlock$ +BlockManagerMessages.StopBlockManagerMaster$ +BlockManagerMessages.ToBlockManagerMaster +BlockManagerMessages.ToBlockManagerSlave +BlockManagerMessages.TriggerThreadDump$ +BlockManagerMessages.UpdateBlockInfo +BlockManagerMessages.UpdateBlockInfo$ +BlockMatrix +BlockNotFoundException +BlockReplicationPolicy +BlockReplicationUtils +BlockStatus +BlockUpdatedInfo +BloomFilter +BloomFilter.Version +BooleanParam +BooleanType +BoostingStrategy +BoundedDouble +BreezeUtil +Broadcast +BroadcastBlockId +Broker +BucketedRandomProjectionLSH +BucketedRandomProjectionLSHModel +Bucketizer +BufferReleasingInputStream +BytecodeUtils +ByteType +CalendarIntervalType +Catalog +CatalystScan +CategoricalSplit +CausedBy +CharType +CheckpointReader +CheckpointState +ChiSqSelector +ChiSqSelector +ChiSqSelectorModel +ChiSqSelectorModel +ChiSqSelectorModel.SaveLoadV1_0$ +ChiSqTest +ChiSqTest.Method +ChiSqTest.Method$ +ChiSqTest.NullHypothesis$ +ChiSqTestResult +ChiSquareTest +CholeskyDecomposition +ClassificationModel +ClassificationModel +Classifier +CleanAccum +CleanBroadcast +CleanCheckpoint +CleanRDD +CleanShuffle +CleanupTask +CleanupTaskWeakReference +ClosureCleaner +ClusteringSummary +CoarseGrainedClusterMessages +CoarseGrainedClusterMessages.AddWebUIFilter +CoarseGrainedClusterMessages.AddWebUIFilter$ +CoarseGrainedClusterMessages.GetExecutorLossReason +CoarseGrainedClusterMessages.GetExecutorLossReason$ +CoarseGrainedClusterMessages.KillExecutors +CoarseGrainedClusterMessages.KillExecutors$ +CoarseGrainedClusterMessages.KillExecutorsOnHost +CoarseGrainedClusterMessages.KillExecutorsOnHost$ +CoarseGrainedClusterMessages.KillTask +CoarseGrainedClusterMessages.KillTask$
[03/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/org/apache/spark/ExceptionFailure.html -- diff --git a/site/docs/2.2.2/api/java/org/apache/spark/ExceptionFailure.html b/site/docs/2.2.2/api/java/org/apache/spark/ExceptionFailure.html new file mode 100644 index 000..811877d --- /dev/null +++ b/site/docs/2.2.2/api/java/org/apache/spark/ExceptionFailure.html @@ -0,0 +1,502 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +ExceptionFailure (Spark 2.2.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":5,"i2":10,"i3":9,"i4":10,"i5":5,"i6":10,"i7":10,"i8":5,"i9":5,"i10":9,"i11":9,"i12":10,"i13":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + + +Frames +NoFrames + + +AllClasses + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class ExceptionFailure + + + +Object + + +org.apache.spark.ExceptionFailure + + + + + + + +All Implemented Interfaces: +java.io.Serializable, TaskEndReason, TaskFailedReason, scala.Equals, scala.Product + + + +public class ExceptionFailure +extends Object +implements TaskFailedReason, scala.Product, scala.Serializable +:: DeveloperApi :: + Task failed due to a runtime exception. This is the most common failure case and also captures + user program exceptions. + + stackTrace contains the stack trace of the exception itself. It still exists for backward + compatibility. It's better to use this(e: Throwable, metrics: Option[TaskMetrics]) to + create ExceptionFailure as it will handle the backward compatibility properly. + + fullStackTrace is a better representation of the stack trace because it contains the whole + stack trace including the exception and its causes + + exception is the actual exception that caused the task to fail. It may be None in + the case that the exception is not in fact serializable. If a task fails more than + once (due to retries), exception is that one that caused the last failure. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +ExceptionFailure(StringclassName, +Stringdescription, +StackTraceElement[]stackTrace, +StringfullStackTrace, + scala.Optionorg.apache.spark.ThrowableSerializationWrapperexceptionWrapper, +scala.collection.SeqAccumulableInfoaccumUpdates, +scala.collection.SeqAccumulatorV2?,?accums) + + + + + + + + + +Method Summary + +All MethodsStatic MethodsInstance MethodsAbstract MethodsConcrete Methods + +Modifier and Type +Method and Description + + +scala.collection.SeqAccumulableInfo +accumUpdates() + + +abstract static boolean +canEqual(Objectthat) + + +String +className() + + +static boolean +countTowardsTaskFailures() + + +String +description() + + +abstract static boolean +equals(Objectthat) + + +scala.OptionThrowable +exception() + + +String +fullStackTrace() + + +abstract static int +productArity() + + +abstract static Object +productElement(intn) + + +static scala.collection.IteratorObject +productIterator() + + +static String +productPrefix() + + +StackTraceElement[] +stackTrace() + + +String +toErrorString() +Error message displayed in the web UI. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.TaskFailedReason +countTowardsTaskFailures + + + + + +Methods inherited from interfacescala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interfacescala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +ExceptionFailure +publicExceptionFailure(StringclassName, +Stringdescription, +StackTraceElement[]stackTrace, +StringfullStackTrace, + scala.Optionorg.apache.spark.ThrowableSerializationWrapperexceptionWrapper, +
[31/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/allclasses-noframe.html -- diff --git a/site/docs/2.2.2/api/java/allclasses-noframe.html b/site/docs/2.2.2/api/java/allclasses-noframe.html new file mode 100644 index 000..fbda1f1 --- /dev/null +++ b/site/docs/2.2.2/api/java/allclasses-noframe.html @@ -0,0 +1,1199 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +All Classes (Spark 2.2.2 JavaDoc) + + + + + +AllClasses + + +AbsoluteError +Accumulable +AccumulableInfo +AccumulableInfo +AccumulableParam +Accumulator +AccumulatorContext +AccumulatorParam +AccumulatorParam.DoubleAccumulatorParam$ +AccumulatorParam.FloatAccumulatorParam$ +AccumulatorParam.IntAccumulatorParam$ +AccumulatorParam.LongAccumulatorParam$ +AccumulatorParam.StringAccumulatorParam$ +AccumulatorV2 +AFTAggregator +AFTCostFun +AFTSurvivalRegression +AFTSurvivalRegressionModel +AggregatedDialect +AggregatingEdgeContext +Aggregator +Aggregator +Algo +AllJobsCancelled +AllReceiverIds +ALS +ALS +ALS.InBlock$ +ALS.Rating +ALS.Rating$ +ALS.RatingBlock$ +ALSModel +AnalysisException +And +AnyDataType +ApplicationAttemptInfo +ApplicationEnvironmentInfo +ApplicationInfo +ApplicationsListResource +ApplicationStatus +ApplyInPlace +AreaUnderCurve +ArrayType +AskPermissionToCommitOutput +AssociationRules +AssociationRules +AssociationRules.Rule +AsyncRDDActions +Attribute +AttributeGroup +AttributeKeys +AttributeType +BaseRelation +BaseRRDD +BasicBlockReplicationPolicy +BatchInfo +BatchInfo +BatchStatus +BernoulliCellSampler +BernoulliSampler +Binarizer +BinaryAttribute +BinaryClassificationEvaluator +BinaryClassificationMetrics +BinaryLogisticRegressionSummary +BinaryLogisticRegressionTrainingSummary +BinarySample +BinaryType +BinomialBounds +BisectingKMeans +BisectingKMeans +BisectingKMeansModel +BisectingKMeansModel +BisectingKMeansModel.SaveLoadV1_0$ +BisectingKMeansSummary +BlacklistedExecutor +BLAS +BLAS +BlockId +BlockManagerId +BlockManagerMessages +BlockManagerMessages.BlockManagerHeartbeat +BlockManagerMessages.BlockManagerHeartbeat$ +BlockManagerMessages.GetBlockStatus +BlockManagerMessages.GetBlockStatus$ +BlockManagerMessages.GetExecutorEndpointRef +BlockManagerMessages.GetExecutorEndpointRef$ +BlockManagerMessages.GetLocations +BlockManagerMessages.GetLocations$ +BlockManagerMessages.GetLocationsMultipleBlockIds +BlockManagerMessages.GetLocationsMultipleBlockIds$ +BlockManagerMessages.GetMatchingBlockIds +BlockManagerMessages.GetMatchingBlockIds$ +BlockManagerMessages.GetMemoryStatus$ +BlockManagerMessages.GetPeers +BlockManagerMessages.GetPeers$ +BlockManagerMessages.GetStorageStatus$ +BlockManagerMessages.HasCachedBlocks +BlockManagerMessages.HasCachedBlocks$ +BlockManagerMessages.RegisterBlockManager +BlockManagerMessages.RegisterBlockManager$ +BlockManagerMessages.RemoveBlock +BlockManagerMessages.RemoveBlock$ +BlockManagerMessages.RemoveBroadcast +BlockManagerMessages.RemoveBroadcast$ +BlockManagerMessages.RemoveExecutor +BlockManagerMessages.RemoveExecutor$ +BlockManagerMessages.RemoveRdd +BlockManagerMessages.RemoveRdd$ +BlockManagerMessages.RemoveShuffle +BlockManagerMessages.RemoveShuffle$ +BlockManagerMessages.ReplicateBlock +BlockManagerMessages.ReplicateBlock$ +BlockManagerMessages.StopBlockManagerMaster$ +BlockManagerMessages.ToBlockManagerMaster +BlockManagerMessages.ToBlockManagerSlave +BlockManagerMessages.TriggerThreadDump$ +BlockManagerMessages.UpdateBlockInfo +BlockManagerMessages.UpdateBlockInfo$ +BlockMatrix +BlockNotFoundException +BlockReplicationPolicy +BlockReplicationUtils +BlockStatus +BlockUpdatedInfo +BloomFilter +BloomFilter.Version +BooleanParam +BooleanType +BoostingStrategy +BoundedDouble +BreezeUtil +Broadcast +BroadcastBlockId +Broker +BucketedRandomProjectionLSH +BucketedRandomProjectionLSHModel +Bucketizer +BufferReleasingInputStream +BytecodeUtils +ByteType +CalendarIntervalType +Catalog +CatalystScan +CategoricalSplit +CausedBy +CharType +CheckpointReader +CheckpointState +ChiSqSelector +ChiSqSelector +ChiSqSelectorModel +ChiSqSelectorModel +ChiSqSelectorModel.SaveLoadV1_0$ +ChiSqTest +ChiSqTest.Method +ChiSqTest.Method$ +ChiSqTest.NullHypothesis$ +ChiSqTestResult +ChiSquareTest +CholeskyDecomposition +ClassificationModel +ClassificationModel +Classifier +CleanAccum +CleanBroadcast +CleanCheckpoint +CleanRDD +CleanShuffle +CleanupTask +CleanupTaskWeakReference +ClosureCleaner +ClusteringSummary +CoarseGrainedClusterMessages +CoarseGrainedClusterMessages.AddWebUIFilter +CoarseGrainedClusterMessages.AddWebUIFilter$ +CoarseGrainedClusterMessages.GetExecutorLossReason +CoarseGrainedClusterMessages.GetExecutorLossReason$ +CoarseGrainedClusterMessages.KillExecutors +CoarseGrainedClusterMessages.KillExecutors$ +CoarseGrainedClusterMessages.KillExecutorsOnHost +CoarseGrainedClusterMessages.KillExecutorsOnHost$ +CoarseGrainedClusterMessages.KillTask +CoarseGrainedClusterMessages.KillTask$
[13/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fregression%2Fpackage-tree.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fregression%2Fpackage-tree.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fregression%2Fpackage-tree.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fregression%2Fpackage-tree.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fstat%2FKernelDensity.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fstat%2FKernelDensity.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fstat%2FKernelDensity.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fstat%2FKernelDensity.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) +
[15/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Flinalg%2FMatrixImplicits.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Flinalg%2FMatrixImplicits.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Flinalg%2FMatrixImplicits.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Flinalg%2FMatrixImplicits.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Flinalg%2FQRDecomposition.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Flinalg%2FQRDecomposition.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Flinalg%2FQRDecomposition.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Flinalg%2FQRDecomposition.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' &&
[02/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/org/apache/spark/FetchFailed.html -- diff --git a/site/docs/2.2.2/api/java/org/apache/spark/FetchFailed.html b/site/docs/2.2.2/api/java/org/apache/spark/FetchFailed.html new file mode 100644 index 000..f83bd30 --- /dev/null +++ b/site/docs/2.2.2/api/java/org/apache/spark/FetchFailed.html @@ -0,0 +1,483 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +FetchFailed (Spark 2.2.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":5,"i2":10,"i3":5,"i4":10,"i5":10,"i6":5,"i7":5,"i8":9,"i9":9,"i10":10,"i11":10,"i12":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + + +Frames +NoFrames + + +AllClasses + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class FetchFailed + + + +Object + + +org.apache.spark.FetchFailed + + + + + + + +All Implemented Interfaces: +java.io.Serializable, TaskEndReason, TaskFailedReason, scala.Equals, scala.Product + + + +public class FetchFailed +extends Object +implements TaskFailedReason, scala.Product, scala.Serializable +:: DeveloperApi :: + Task failed to fetch shuffle data from a remote node. Probably means we have lost the remote + executors the task is trying to fetch from, and thus need to rerun the previous stage. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +FetchFailed(BlockManagerIdbmAddress, + intshuffleId, + intmapId, + intreduceId, + Stringmessage) + + + + + + + + + +Method Summary + +All MethodsStatic MethodsInstance MethodsAbstract MethodsConcrete Methods + +Modifier and Type +Method and Description + + +BlockManagerId +bmAddress() + + +abstract static boolean +canEqual(Objectthat) + + +boolean +countTowardsTaskFailures() +Fetch failures lead to a different failure handling path: (1) we don't abort the stage after + 4 task failures, instead we immediately go back to the stage which generated the map output, + and regenerate the missing data. + + + +abstract static boolean +equals(Objectthat) + + +int +mapId() + + +String +message() + + +abstract static int +productArity() + + +abstract static Object +productElement(intn) + + +static scala.collection.IteratorObject +productIterator() + + +static String +productPrefix() + + +int +reduceId() + + +int +shuffleId() + + +String +toErrorString() +Error message displayed in the web UI. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfacescala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interfacescala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +FetchFailed +publicFetchFailed(BlockManagerIdbmAddress, + intshuffleId, + intmapId, + intreduceId, + Stringmessage) + + + + + + + + + +Method Detail + + + + + +canEqual +public abstract staticbooleancanEqual(Objectthat) + + + + + + + +equals +public abstract staticbooleanequals(Objectthat) + + + + + + + +productElement +public abstract staticObjectproductElement(intn) + + + + + + + +productArity +public abstract staticintproductArity() + + + + + + + +productIterator +public staticscala.collection.IteratorObjectproductIterator() + + + + + + + +productPrefix +public staticStringproductPrefix() + + + + + + + +bmAddress +publicBlockManagerIdbmAddress() + + + + + + + +shuffleId +publicintshuffleId() + + + + + + + +mapId +publicintmapId() + + + + + + + +reduceId +publicintreduceId() + + + + + + + +message +publicStringmessage() + + + + + + + +toErrorString +publicStringtoErrorString() +Description copied from interface:TaskFailedReason +Error message displayed in the web UI. + +Specified by: +toErrorStringin interfaceTaskFailedReason + + + + + + + + +countTowardsTaskFailures +publicbooleancountTowardsTaskFailures() +Fetch failures
[05/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/org/apache/spark/Aggregator.html -- diff --git a/site/docs/2.2.2/api/java/org/apache/spark/Aggregator.html b/site/docs/2.2.2/api/java/org/apache/spark/Aggregator.html new file mode 100644 index 000..1ece7c0 --- /dev/null +++ b/site/docs/2.2.2/api/java/org/apache/spark/Aggregator.html @@ -0,0 +1,436 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Aggregator (Spark 2.2.2 JavaDoc) + + + + + + +var methods = {"i0":5,"i1":10,"i2":10,"i3":10,"i4":5,"i5":10,"i6":10,"i7":5,"i8":5,"i9":9,"i10":9}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + + +Frames +NoFrames + + +AllClasses + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class AggregatorK,V,C + + + +Object + + +org.apache.spark.AggregatorK,V,C + + + + + + + +All Implemented Interfaces: +java.io.Serializable, scala.Equals, scala.Product + + + +public class AggregatorK,V,C +extends Object +implements scala.Product, scala.Serializable +:: DeveloperApi :: + A set of functions used to aggregate data. + + param: createCombiner function to create the initial value of the aggregation. + param: mergeValue function to merge a new value into the aggregation result. + param: mergeCombiners function to merge outputs from multiple mergeValue function. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +Aggregator(scala.Function1V,CcreateCombiner, + scala.Function2C,V,CmergeValue, + scala.Function2C,C,CmergeCombiners) + + + + + + + + + +Method Summary + +All MethodsStatic MethodsInstance MethodsAbstract MethodsConcrete Methods + +Modifier and Type +Method and Description + + +abstract static boolean +canEqual(Objectthat) + + +scala.collection.Iteratorscala.Tuple2K,C +combineCombinersByKey(scala.collection.Iterator? extends scala.Product2K,Citer, + TaskContextcontext) + + +scala.collection.Iteratorscala.Tuple2K,C +combineValuesByKey(scala.collection.Iterator? extends scala.Product2K,Viter, + TaskContextcontext) + + +scala.Function1V,C +createCombiner() + + +abstract static boolean +equals(Objectthat) + + +scala.Function2C,C,C +mergeCombiners() + + +scala.Function2C,V,C +mergeValue() + + +abstract static int +productArity() + + +abstract static Object +productElement(intn) + + +static scala.collection.IteratorObject +productIterator() + + +static String +productPrefix() + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfacescala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interfacescala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +Aggregator +publicAggregator(scala.Function1V,CcreateCombiner, + scala.Function2C,V,CmergeValue, + scala.Function2C,C,CmergeCombiners) + + + + + + + + + +Method Detail + + + + + +canEqual +public abstract staticbooleancanEqual(Objectthat) + + + + + + + +equals +public abstract staticbooleanequals(Objectthat) + + + + + + + +productElement +public abstract staticObjectproductElement(intn) + + + + + + + +productArity +public abstract staticintproductArity() + + + + + + + +productIterator +public staticscala.collection.IteratorObjectproductIterator() + + + + + + + +productPrefix +public staticStringproductPrefix() + + + + + + + +createCombiner +publicscala.Function1V,CcreateCombiner() + + + + + + + +mergeValue +publicscala.Function2C,V,CmergeValue() + + + + + + + +mergeCombiners +publicscala.Function2C,C,CmergeCombiners() + + + + + + + +combineValuesByKey +publicscala.collection.Iteratorscala.Tuple2K,CcombineValuesByKey(scala.collection.Iterator? extends scala.Product2K,Viter, + TaskContextcontext) + + + + + + + +combineCombinersByKey
[29/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index-all.html -- diff --git a/site/docs/2.2.2/api/java/index-all.html b/site/docs/2.2.2/api/java/index-all.html new file mode 100644 index 000..ede2df8 --- /dev/null +++ b/site/docs/2.2.2/api/java/index-all.html @@ -0,0 +1,48954 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Index (Spark 2.2.2 JavaDoc) + + + + + + + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +NoFrames + + +AllClasses + + + + + + + + + +$ABCDEFGHIJKLMNOPQRSTUVWXYZ_ + + +$ + +$colon$bslash(B, Function2A, B, B) - Static method in class org.apache.spark.sql.types.StructType + +$colon$plus(B, CanBuildFromRepr, B, That) - Static method in class org.apache.spark.sql.types.StructType + +$div$colon(B, Function2B, A, B) - Static method in class org.apache.spark.sql.types.StructType + +$greater(A) - Static method in class org.apache.spark.sql.types.Decimal + +$greater(A) - Static method in class org.apache.spark.storage.RDDInfo + +$greater$eq(A) - Static method in class org.apache.spark.sql.types.Decimal + +$greater$eq(A) - Static method in class org.apache.spark.storage.RDDInfo + +$less(A) - Static method in class org.apache.spark.sql.types.Decimal + +$less(A) - Static method in class org.apache.spark.storage.RDDInfo + +$less$eq(A) - Static method in class org.apache.spark.sql.types.Decimal + +$less$eq(A) - Static method in class org.apache.spark.storage.RDDInfo + +$minus$greater(T) - Static method in class org.apache.spark.ml.param.DoubleParam + +$minus$greater(T) - Static method in class org.apache.spark.ml.param.FloatParam + +$plus$colon(B, CanBuildFromRepr, B, That) - Static method in class org.apache.spark.sql.types.StructType + +$plus$eq(T) - Static method in class org.apache.spark.Accumulator + +Deprecated. + +$plus$plus(RDDT) - Static method in class org.apache.spark.api.r.RRDD + +$plus$plus(RDDT) - Static method in class org.apache.spark.graphx.EdgeRDD + +$plus$plus(RDDT) - Static method in class org.apache.spark.graphx.impl.EdgeRDDImpl + +$plus$plus(RDDT) - Static method in class org.apache.spark.graphx.impl.VertexRDDImpl + +$plus$plus(RDDT) - Static method in class org.apache.spark.graphx.VertexRDD + +$plus$plus(RDDT) - Static method in class org.apache.spark.rdd.HadoopRDD + +$plus$plus(RDDT) - Static method in class org.apache.spark.rdd.JdbcRDD + +$plus$plus(RDDT) - Static method in class org.apache.spark.rdd.NewHadoopRDD + +$plus$plus(RDDT) - Static method in class org.apache.spark.rdd.PartitionPruningRDD + +$plus$plus(RDDT) - Static method in class org.apache.spark.rdd.UnionRDD + +$plus$plus(GenTraversableOnceB, CanBuildFromRepr, B, That) - Static method in class org.apache.spark.sql.types.StructType + +$plus$plus$colon(TraversableOnceB, CanBuildFromRepr, B, That) - Static method in class org.apache.spark.sql.types.StructType + +$plus$plus$colon(TraversableB, CanBuildFromRepr, B, That) - Static method in class org.apache.spark.sql.types.StructType + +$plus$plus$eq(R) - Static method in class org.apache.spark.Accumulator + +Deprecated. + + + + + +A + +abortJob(JobContext) - Method in class org.apache.spark.internal.io.FileCommitProtocol + +Aborts a job after the writes fail. + +abortJob(JobContext) - Method in class org.apache.spark.internal.io.HadoopMapReduceCommitProtocol + +abortTask(TaskAttemptContext) - Method in class org.apache.spark.internal.io.FileCommitProtocol + +Aborts a task after the writes have failed. + +abortTask(TaskAttemptContext) - Method in class org.apache.spark.internal.io.HadoopMapReduceCommitProtocol + +abs(Column) - Static method in class org.apache.spark.sql.functions + +Computes the absolute value. + +abs() - Method in class org.apache.spark.sql.types.Decimal + +absent() - Static method in class org.apache.spark.api.java.Optional + +AbsoluteError - Class in org.apache.spark.mllib.tree.loss + +:: DeveloperApi :: + Class for absolute error loss calculation (for regression). + +AbsoluteError() - Constructor for class org.apache.spark.mllib.tree.loss.AbsoluteError + +accept(Parsers) - Static method in class org.apache.spark.ml.feature.RFormulaParser + +accept(ES, Function1ES, ListObject) - Static method in class org.apache.spark.ml.feature.RFormulaParser + +accept(String, PartialFunctionObject, U) - Static method in class org.apache.spark.ml.feature.RFormulaParser +
[10/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fexpressions%2Fjavalang%2Ftyped.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fexpressions%2Fjavalang%2Ftyped.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fexpressions%2Fjavalang%2Ftyped.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fexpressions%2Fjavalang%2Ftyped.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fexpressions%2Fscalalang%2Ftyped.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fexpressions%2Fscalalang%2Ftyped.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fexpressions%2Fscalalang%2Ftyped.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2Fexpressions%2Fscalalang%2Ftyped.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true;
[35/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/structField.html -- diff --git a/site/docs/2.2.2/api/R/structField.html b/site/docs/2.2.2/api/R/structField.html new file mode 100644 index 000..24f5e24 --- /dev/null +++ b/site/docs/2.2.2/api/R/structField.html @@ -0,0 +1,83 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: structField + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +structField {SparkR}R Documentation + +structField + +Description + +Create a structField object that contains the metadata for a single field in a schema. + + + +Usage + + +structField(x, ...) + +## S3 method for class 'jobj' +structField(x, ...) + +## S3 method for class 'character' +structField(x, type, nullable = TRUE, ...) + + + +Arguments + + +x + +the name of the field. + +... + +additional argument(s) passed to the method. + +type + +The data type of the field + +nullable + +A logical vector indicating whether or not the field is nullable + + + + +Value + +A structField object. + + + +Note + +structField since 1.4.0 + + + +Examples + +## Not run: +##D field1 - structField(a, integer) +##D field2 - structField(c, string) +##D field3 - structField(avg, double) +##D schema - structType(field1, field2, field3) +##D df1 - gapply(df, list(a, c), +##D function(key, x) { y - data.frame(key, mean(x$b), stringsAsFactors = FALSE) }, +##D schema) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/structType.html -- diff --git a/site/docs/2.2.2/api/R/structType.html b/site/docs/2.2.2/api/R/structType.html new file mode 100644 index 000..b33a0e6 --- /dev/null +++ b/site/docs/2.2.2/api/R/structType.html @@ -0,0 +1,74 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: structType + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +structType {SparkR}R Documentation + +structType + +Description + +Create a structType object that contains the metadata for a SparkDataFrame. Intended for +use with createDataFrame and toDF. + + + +Usage + + +structType(x, ...) + +## S3 method for class 'jobj' +structType(x, ...) + +## S3 method for class 'structField' +structType(x, ...) + + + +Arguments + + +x + +a structField object (created with the field() function) + +... + +additional structField objects + + + + +Value + +a structType object + + + +Note + +structType since 1.4.0 + + + +Examples + +## Not run: +##D schema - structType(structField(a, integer), structField(c, string), +##D structField(avg, double)) +##D df1 - gapply(df, list(a, c), +##D function(key, x) { y - data.frame(key, mean(x$b), stringsAsFactors = FALSE) }, +##D schema) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/subset.html -- diff --git a/site/docs/2.2.2/api/R/subset.html b/site/docs/2.2.2/api/R/subset.html new file mode 100644 index 000..8c7ed90 --- /dev/null +++ b/site/docs/2.2.2/api/R/subset.html @@ -0,0 +1,164 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Subset + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +subset {SparkR}R Documentation + +Subset + +Description + +Return subsets of SparkDataFrame according to given conditions + + + +Usage + + +subset(x, ...) + +## S4 method for signature 'SparkDataFrame,numericOrcharacter' +x[[i]] + +## S4 replacement method for signature 'SparkDataFrame,numericOrcharacter' +x[[i]] - value + +## S4 method for signature 'SparkDataFrame' +x[i, j, ..., drop = F] + +## S4 method for signature 'SparkDataFrame' +subset(x, subset, select, drop = F, ...) + + + +Arguments + + +x + +a SparkDataFrame. + +... + +currently not used. + +i, subset + +(Optional) a logical expression to filter on rows. +For extract operator [[ and replacement operator [[-, the
[04/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/org/apache/spark/CleanShuffle.html -- diff --git a/site/docs/2.2.2/api/java/org/apache/spark/CleanShuffle.html b/site/docs/2.2.2/api/java/org/apache/spark/CleanShuffle.html new file mode 100644 index 000..5ecf0da --- /dev/null +++ b/site/docs/2.2.2/api/java/org/apache/spark/CleanShuffle.html @@ -0,0 +1,370 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +CleanShuffle (Spark 2.2.2 JavaDoc) + + + + + + +var methods = {"i0":5,"i1":5,"i2":5,"i3":5,"i4":9,"i5":9,"i6":10}; +var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + + +Frames +NoFrames + + +AllClasses + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class CleanShuffle + + + +Object + + +org.apache.spark.CleanShuffle + + + + + + + +All Implemented Interfaces: +java.io.Serializable, CleanupTask, scala.Equals, scala.Product + + + +public class CleanShuffle +extends Object +implements CleanupTask, scala.Product, scala.Serializable + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +CleanShuffle(intshuffleId) + + + + + + + + + +Method Summary + +All MethodsStatic MethodsInstance MethodsAbstract MethodsConcrete Methods + +Modifier and Type +Method and Description + + +abstract static boolean +canEqual(Objectthat) + + +abstract static boolean +equals(Objectthat) + + +abstract static int +productArity() + + +abstract static Object +productElement(intn) + + +static scala.collection.IteratorObject +productIterator() + + +static String +productPrefix() + + +int +shuffleId() + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfacescala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interfacescala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +CleanShuffle +publicCleanShuffle(intshuffleId) + + + + + + + + + +Method Detail + + + + + +canEqual +public abstract staticbooleancanEqual(Objectthat) + + + + + + + +equals +public abstract staticbooleanequals(Objectthat) + + + + + + + +productElement +public abstract staticObjectproductElement(intn) + + + + + + + +productArity +public abstract staticintproductArity() + + + + + + + +productIterator +public staticscala.collection.IteratorObjectproductIterator() + + + + + + + +productPrefix +public staticStringproductPrefix() + + + + + + + +shuffleId +publicintshuffleId() + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + + +Frames +NoFrames + + +AllClasses + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/org/apache/spark/CleanupTask.html -- diff --git a/site/docs/2.2.2/api/java/org/apache/spark/CleanupTask.html b/site/docs/2.2.2/api/java/org/apache/spark/CleanupTask.html new file mode 100644 index 000..6f63401 --- /dev/null +++ b/site/docs/2.2.2/api/java/org/apache/spark/CleanupTask.html @@ -0,0 +1,170 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +CleanupTask (Spark 2.2.2 JavaDoc) + + + + + + + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + +
[45/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/crossJoin.html -- diff --git a/site/docs/2.2.2/api/R/crossJoin.html b/site/docs/2.2.2/api/R/crossJoin.html new file mode 100644 index 000..b0939c9 --- /dev/null +++ b/site/docs/2.2.2/api/R/crossJoin.html @@ -0,0 +1,112 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: CrossJoin + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +crossJoin {SparkR}R Documentation + +CrossJoin + +Description + +Returns Cartesian Product on two SparkDataFrames. + + + +Usage + + +## S4 method for signature 'SparkDataFrame,SparkDataFrame' +crossJoin(x, y) + + + +Arguments + + +x + +A SparkDataFrame + +y + +A SparkDataFrame + + + + +Value + +A SparkDataFrame containing the result of the join operation. + + + +Note + +crossJoin since 2.1.0 + + + +See Also + +merge join + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, arrange, +as.data.frame, +attach,SparkDataFrame-method, +cache, checkpoint, +coalesce, collect, +colnames, coltypes, +createOrReplaceTempView, +dapplyCollect, dapply, +describe, dim, +distinct, dropDuplicates, +dropna, drop, +dtypes, except, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, head, +hint, histogram, +insertInto, intersect, +isLocal, isStreaming, +join, limit, +merge, mutate, +ncol, nrow, +persist, printSchema, +randomSplit, rbind, +registerTempTable, rename, +repartition, sample, +saveAsTable, schema, +selectExpr, select, +showDF, show, +storageLevel, str, +subset, take, +toJSON, union, +unpersist, withColumn, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D df1 - read.json(path) +##D df2 - read.json(path2) +##D crossJoin(df1, df2) # Performs a Cartesian +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/crosstab.html -- diff --git a/site/docs/2.2.2/api/R/crosstab.html b/site/docs/2.2.2/api/R/crosstab.html new file mode 100644 index 000..c4e0894 --- /dev/null +++ b/site/docs/2.2.2/api/R/crosstab.html @@ -0,0 +1,82 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Computes a pair-wise frequency table of the given columns + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +crosstab {SparkR}R Documentation + +Computes a pair-wise frequency table of the given columns + +Description + +Computes a pair-wise frequency table of the given columns. Also known as a contingency +table. The number of distinct values for each column should be less than 1e4. At most 1e6 +non-zero pair frequencies will be returned. + + + +Usage + + +## S4 method for signature 'SparkDataFrame,character,character' +crosstab(x, col1, col2) + + + +Arguments + + +x + +a SparkDataFrame + +col1 + +name of the first column. Distinct items will make the first item of each row. + +col2 + +name of the second column. Distinct items will make the column names of the output. + + + + +Value + +a local R data.frame representing the contingency table. The first column of each row +will be the distinct values of col1 and the column names will be the distinct values +of col2. The name of the first column will be col1_col2. Pairs +that have no occurrences will have zero as their counts. + + + +Note + +crosstab since 1.5.0 + + + +See Also + +Other stat functions: approxQuantile, +corr, cov, +freqItems, sampleBy + + + +Examples + +## Not run: +##D df - read.json(/path/to/file.json) +##D ct - crosstab(df, title, gender) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/cume_dist.html -- diff --git a/site/docs/2.2.2/api/R/cume_dist.html b/site/docs/2.2.2/api/R/cume_dist.html new file mode 100644 index 000..96afaa1 --- /dev/null +++ b/site/docs/2.2.2/api/R/cume_dist.html @@ -0,0 +1,77 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: cume_dist + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;>
[14/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Foptimization%2FLeastSquaresGradient.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Foptimization%2FLeastSquaresGradient.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Foptimization%2FLeastSquaresGradient.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Foptimization%2FLeastSquaresGradient.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Foptimization%2FOptimizer.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Foptimization%2FOptimizer.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Foptimization%2FOptimizer.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Foptimization%2FOptimizer.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot
[34/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/translate.html -- diff --git a/site/docs/2.2.2/api/R/translate.html b/site/docs/2.2.2/api/R/translate.html new file mode 100644 index 000..f7102a4 --- /dev/null +++ b/site/docs/2.2.2/api/R/translate.html @@ -0,0 +1,87 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: translate + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +translate {SparkR}R Documentation + +translate + +Description + +Translate any character in the src by a character in replaceString. +The characters in replaceString is corresponding to the characters in matchingString. +The translate will happen when any character in the string matching with the character +in the matchingString. + + + +Usage + + +translate(x, matchingString, replaceString) + +## S4 method for signature 'Column,character,character' +translate(x, matchingString, + replaceString) + + + +Arguments + + +x + +a string Column. + +matchingString + +a source string where each character will be translated. + +replaceString + +a target string where each matchingString character will +be replaced by the character in replaceString +at the same location, if any. + + + + +Note + +translate since 1.5.0 + + + +See Also + +Other string_funcs: ascii, +base64, concat_ws, +concat, decode, +encode, format_number, +format_string, initcap, +instr, length, +levenshtein, locate, +lower, lpad, +ltrim, regexp_extract, +regexp_replace, reverse, +rpad, rtrim, +soundex, substring_index, +trim, unbase64, +upper + + + +Examples + +## Not run: translate(df$c, rnlt, 123) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/trim.html -- diff --git a/site/docs/2.2.2/api/R/trim.html b/site/docs/2.2.2/api/R/trim.html new file mode 100644 index 000..4d78065 --- /dev/null +++ b/site/docs/2.2.2/api/R/trim.html @@ -0,0 +1,73 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: trim + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +trim {SparkR}R Documentation + +trim + +Description + +Trim the spaces from both ends for the specified string column. + + + +Usage + + +trim(x) + +## S4 method for signature 'Column' +trim(x) + + + +Arguments + + +x + +Column to compute on. + + + + +Note + +trim since 1.5.0 + + + +See Also + +Other string_funcs: ascii, +base64, concat_ws, +concat, decode, +encode, format_number, +format_string, initcap, +instr, length, +levenshtein, locate, +lower, lpad, +ltrim, regexp_extract, +regexp_replace, reverse, +rpad, rtrim, +soundex, substring_index, +translate, unbase64, +upper + + + +Examples + +## Not run: trim(df$c) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/unbase64.html -- diff --git a/site/docs/2.2.2/api/R/unbase64.html b/site/docs/2.2.2/api/R/unbase64.html new file mode 100644 index 000..8fac795 --- /dev/null +++ b/site/docs/2.2.2/api/R/unbase64.html @@ -0,0 +1,74 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: unbase64 + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +unbase64 {SparkR}R Documentation + +unbase64 + +Description + +Decodes a BASE64 encoded string column and returns it as a binary column. +This is the reverse of base64. + + + +Usage + + +unbase64(x) + +## S4 method for signature 'Column' +unbase64(x) + + + +Arguments + + +x + +Column to compute on. + + + + +Note + +unbase64 since 1.5.0 + + + +See Also + +Other string_funcs: ascii, +base64, concat_ws, +concat, decode, +encode, format_number, +format_string, initcap, +instr, length, +levenshtein, locate, +lower, lpad, +ltrim, regexp_extract, +regexp_replace, reverse, +rpad, rtrim, +soundex, substring_index, +translate, trim, +upper + + + +Examples + +## Not run: unbase64(df$c) + + + +[Package SparkR version 2.2.2 Index] +
[07/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/org/apache/index.html -- diff --git a/site/docs/2.2.2/api/java/org/apache/index.html b/site/docs/2.2.2/api/java/org/apache/index.html new file mode 100644 index 000..ac53c53 --- /dev/null +++ b/site/docs/2.2.2/api/java/org/apache/index.html @@ -0,0 +1,8 @@ +dist - Revision 27912: /dev/spark/v2.2.2-rc2-docs/_site/api/java/org/apache + + dist - Revision 27912: /dev/spark/v2.2.2-rc2-docs/_site/api/java/org/apache + + .. + spark/ + + \ No newline at end of file http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/org/apache/spark/Accumulable.html -- diff --git a/site/docs/2.2.2/api/java/org/apache/spark/Accumulable.html b/site/docs/2.2.2/api/java/org/apache/spark/Accumulable.html new file mode 100644 index 000..37c8d85 --- /dev/null +++ b/site/docs/2.2.2/api/java/org/apache/spark/Accumulable.html @@ -0,0 +1,489 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Accumulable (Spark 2.2.2 JavaDoc) + + + + + + +var methods = {"i0":42,"i1":42,"i2":42,"i3":42,"i4":42,"i5":42,"i6":42,"i7":42,"i8":42}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"],32:["t6","Deprecated Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + + +Frames +NoFrames + + +AllClasses + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class AccumulableR,T + + + +Object + + +org.apache.spark.AccumulableR,T + + + + + + + +All Implemented Interfaces: +java.io.Serializable + + +Direct Known Subclasses: +Accumulator + + +Deprecated. +use AccumulatorV2. Since 2.0.0. + + +public class AccumulableR,T +extends Object +implements java.io.Serializable +A data type that can be accumulated, i.e. has a commutative and associative "add" operation, + but where the result type, R, may be different from the element type being added, T. + + You must define how to add data, and how to merge two of these together. For some data types, + such as a counter, these might be the same operation. In that case, you can use the simpler + Accumulator. They won't always be the same, though -- e.g., imagine you are + accumulating a set. You will add items to the set, and you will union two sets together. + + Operations are not thread-safe. + + param: id ID of this accumulator; for internal use only. + param: initialValue initial value of accumulator + param: param helper object defining how to add elements of type R and T + param: name human-readable name for use in Spark's web UI + param: countFailedValues whether to accumulate values from failed tasks. This is set to true + for system and time metrics like serialization time or bytes spilled, + and false for things with absolute values like number of input rows. + This should be used for internal metrics only. + +See Also: +Serialized Form + + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +Accumulable(RinitialValue, + AccumulableParamR,Tparam) +Deprecated. + + + + + + + + + + +Method Summary + +All MethodsInstance MethodsConcrete MethodsDeprecated Methods + +Modifier and Type +Method and Description + + +void +add(Tterm) +Deprecated. +Add more data to this accumulator / accumulable + + + +long +id() +Deprecated. + + + +R +localValue() +Deprecated. +Get the current value of this accumulator from within a task. + + + +void +merge(Rterm) +Deprecated. +Merge two accumulable objects together + + + +scala.OptionString +name() +Deprecated. + + + +void +setValue(RnewValue) +Deprecated. +Set the accumulator's value. + + + +String +toString() +Deprecated. + + + +R +value() +Deprecated. +Access the accumulator's current value; only allowed on driver. + + + +R +zero() +Deprecated. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + + + +Accumulable +publicAccumulable(RinitialValue, +
[39/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/read.parquet.html -- diff --git a/site/docs/2.2.2/api/R/read.parquet.html b/site/docs/2.2.2/api/R/read.parquet.html new file mode 100644 index 000..de9ac17 --- /dev/null +++ b/site/docs/2.2.2/api/R/read.parquet.html @@ -0,0 +1,55 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Create a SparkDataFrame from a Parquet file. + + + + +read.parquet {SparkR}R Documentation + +Create a SparkDataFrame from a Parquet file. + +Description + +Loads a Parquet file, returning the result as a SparkDataFrame. + + + +Usage + + +## Default S3 method: +read.parquet(path, ...) + +## Default S3 method: +parquetFile(...) + + + +Arguments + + +path + +path of file to read. A vector of multiple paths is allowed. + +... + +argument(s) passed to the method. + + + + +Value + +SparkDataFrame + + + +Note + +read.parquet since 1.6.0 + +parquetFile since 1.4.0 + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/read.stream.html -- diff --git a/site/docs/2.2.2/api/R/read.stream.html b/site/docs/2.2.2/api/R/read.stream.html new file mode 100644 index 000..98f9a27 --- /dev/null +++ b/site/docs/2.2.2/api/R/read.stream.html @@ -0,0 +1,89 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Load a streaming SparkDataFrame + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +read.stream {SparkR}R Documentation + +Load a streaming SparkDataFrame + +Description + +Returns the dataset in a data source as a SparkDataFrame + + + +Usage + + +read.stream(source = NULL, schema = NULL, ...) + + + +Arguments + + +source + +The name of external data source + +schema + +The data schema defined in structType, this is required for file-based streaming +data source + +... + +additional external data source specific named options, for instance path for +file-based streaming data source + + + + +Details + +The data source is specified by the source and a set of options(...). +If source is not specified, the default data source configured by +spark.sql.sources.default will be used. + + + +Value + +SparkDataFrame + + + +Note + +read.stream since 2.2.0 + +experimental + + + +See Also + +write.stream + + + +Examples + +## Not run: +##D sparkR.session() +##D df - read.stream(socket, host = localhost, port = ) +##D q - write.stream(df, text, path = /home/user/out, checkpointLocation = /home/user/cp) +##D +##D df - read.stream(json, path = jsonDir, schema = schema, maxFilesPerTrigger = 1) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/read.text.html -- diff --git a/site/docs/2.2.2/api/R/read.text.html b/site/docs/2.2.2/api/R/read.text.html new file mode 100644 index 000..708a0fe --- /dev/null +++ b/site/docs/2.2.2/api/R/read.text.html @@ -0,0 +1,74 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Create a SparkDataFrame from a text file. + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +read.text {SparkR}R Documentation + +Create a SparkDataFrame from a text file. + +Description + +Loads text files and returns a SparkDataFrame whose schema starts with +a string column named value, and followed by partitioned columns if +there are any. + + + +Usage + + +## Default S3 method: +read.text(path, ...) + + + +Arguments + + +path + +Path of file to read. A vector of multiple paths is allowed. + +... + +additional external data source specific named properties. + + + + +Details + +Each line in the text file is a new row in the resulting SparkDataFrame. + + + +Value + +SparkDataFrame + + + +Note + +read.text since 1.6.1 + + + +Examples + +## Not run: +##D sparkR.session() +##D path - path/to/file.txt +##D df - read.text(path) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/recoverPartitions.html -- diff --git a/site/docs/2.2.2/api/R/recoverPartitions.html b/site/docs/2.2.2/api/R/recoverPartitions.html new
[47/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/arrange.html -- diff --git a/site/docs/2.2.2/api/R/arrange.html b/site/docs/2.2.2/api/R/arrange.html new file mode 100644 index 000..611e11b --- /dev/null +++ b/site/docs/2.2.2/api/R/arrange.html @@ -0,0 +1,134 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Arrange Rows by Variables + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +arrange {SparkR}R Documentation + +Arrange Rows by Variables + +Description + +Sort a SparkDataFrame by the specified column(s). + + + +Usage + + +arrange(x, col, ...) + +## S4 method for signature 'SparkDataFrame,Column' +arrange(x, col, ...) + +## S4 method for signature 'SparkDataFrame,character' +arrange(x, col, ..., decreasing = FALSE) + +## S4 method for signature 'SparkDataFrame,characterOrColumn' +orderBy(x, col, ...) + + + +Arguments + + +x + +a SparkDataFrame to be sorted. + +col + +a character or Column object indicating the fields to sort on + +... + +additional sorting fields + +decreasing + +a logical argument indicating sorting order for columns when +a character vector is specified for col + + + + +Value + +A SparkDataFrame where all elements are sorted. + + + +Note + +arrange(SparkDataFrame, Column) since 1.4.0 + +arrange(SparkDataFrame, character) since 1.4.0 + +orderBy(SparkDataFrame, characterOrColumn) since 1.4.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, as.data.frame, +attach,SparkDataFrame-method, +cache, checkpoint, +coalesce, collect, +colnames, coltypes, +createOrReplaceTempView, +crossJoin, dapplyCollect, +dapply, describe, +dim, distinct, +dropDuplicates, dropna, +drop, dtypes, +except, explain, +filter, first, +gapplyCollect, gapply, +getNumPartitions, group_by, +head, hint, +histogram, insertInto, +intersect, isLocal, +isStreaming, join, +limit, merge, +mutate, ncol, +nrow, persist, +printSchema, randomSplit, +rbind, registerTempTable, +rename, repartition, +sample, saveAsTable, +schema, selectExpr, +select, showDF, +show, storageLevel, +str, subset, +take, toJSON, +union, unpersist, +withColumn, with, +write.df, write.jdbc, +write.json, write.orc, +write.parquet, write.stream, +write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D path - path/to/file.json +##D df - read.json(path) +##D arrange(df, df$col1) +##D arrange(df, asc(df$col1), desc(abs(df$col2))) +##D arrange(df, col1, decreasing = TRUE) +##D arrange(df, col1, col2, decreasing = c(TRUE, FALSE)) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/array_contains.html -- diff --git a/site/docs/2.2.2/api/R/array_contains.html b/site/docs/2.2.2/api/R/array_contains.html new file mode 100644 index 000..315589f --- /dev/null +++ b/site/docs/2.2.2/api/R/array_contains.html @@ -0,0 +1,66 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: array_contains + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +array_contains {SparkR}R Documentation + +array_contains + +Description + +Returns null if the array is null, true if the array contains the value, and false otherwise. + + + +Usage + + +array_contains(x, value) + +## S4 method for signature 'Column' +array_contains(x, value) + + + +Arguments + + +x + +A Column + +value + +A value to be checked if contained in the column + + + + +Note + +array_contains since 1.6.0 + + + +See Also + +Other collection_funcs: explode, +posexplode, size, +sort_array + + + +Examples + +## Not run: array_contains(df$c, 1) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/as.data.frame.html -- diff --git a/site/docs/2.2.2/api/R/as.data.frame.html b/site/docs/2.2.2/api/R/as.data.frame.html new file mode 100644 index 000..af9cc5f --- /dev/null +++ b/site/docs/2.2.2/api/R/as.data.frame.html @@ -0,0 +1,122 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: Download data from a SparkDataFrame into a R data.frame + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;>
[11/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2FDataFrameStatFunctions.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2FDataFrameStatFunctions.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2FDataFrameStatFunctions.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2FDataFrameStatFunctions.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2FDataFrameWriter.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2FDataFrameWriter.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2FDataFrameWriter.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fsql%2FDataFrameWriter.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +}
[20/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FDecisionTreeRegressionModel.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FDecisionTreeRegressionModel.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FDecisionTreeRegressionModel.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FDecisionTreeRegressionModel.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FDecisionTreeRegressor.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FDecisionTreeRegressor.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FDecisionTreeRegressor.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fregression%2FDecisionTreeRegressor.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch
[41/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/log1p.html -- diff --git a/site/docs/2.2.2/api/R/log1p.html b/site/docs/2.2.2/api/R/log1p.html new file mode 100644 index 000..ec4b4dc --- /dev/null +++ b/site/docs/2.2.2/api/R/log1p.html @@ -0,0 +1,77 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: log1p + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +log1p {SparkR}R Documentation + +log1p + +Description + +Computes the natural logarithm of the given value plus one. + + + +Usage + + +## S4 method for signature 'Column' +log1p(x) + + + +Arguments + + +x + +Column to compute on. + + + + +Note + +log1p since 1.5.0 + + + +See Also + +Other math_funcs: acos, asin, +atan2, atan, +bin, bround, +cbrt, ceil, +conv, corr, +cosh, cos, +covar_pop, cov, +expm1, exp, +factorial, floor, +hex, hypot, +log10, log2, +log, pmod, +rint, round, +shiftLeft, +shiftRightUnsigned, +shiftRight, signum, +sinh, sin, +sqrt, tanh, +tan, toDegrees, +toRadians, unhex + + + +Examples + +## Not run: log1p(df$c) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/log2.html -- diff --git a/site/docs/2.2.2/api/R/log2.html b/site/docs/2.2.2/api/R/log2.html new file mode 100644 index 000..97a48ca --- /dev/null +++ b/site/docs/2.2.2/api/R/log2.html @@ -0,0 +1,77 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: log2 + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +log2 {SparkR}R Documentation + +log2 + +Description + +Computes the logarithm of the given column in base 2. + + + +Usage + + +## S4 method for signature 'Column' +log2(x) + + + +Arguments + + +x + +Column to compute on. + + + + +Note + +log2 since 1.5.0 + + + +See Also + +Other math_funcs: acos, asin, +atan2, atan, +bin, bround, +cbrt, ceil, +conv, corr, +cosh, cos, +covar_pop, cov, +expm1, exp, +factorial, floor, +hex, hypot, +log10, log1p, +log, pmod, +rint, round, +shiftLeft, +shiftRightUnsigned, +shiftRight, signum, +sinh, sin, +sqrt, tanh, +tan, toDegrees, +toRadians, unhex + + + +Examples + +## Not run: log2(df$c) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/lower.html -- diff --git a/site/docs/2.2.2/api/R/lower.html b/site/docs/2.2.2/api/R/lower.html new file mode 100644 index 000..88b28bb --- /dev/null +++ b/site/docs/2.2.2/api/R/lower.html @@ -0,0 +1,73 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: lower + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +lower {SparkR}R Documentation + +lower + +Description + +Converts a string column to lower case. + + + +Usage + + +lower(x) + +## S4 method for signature 'Column' +lower(x) + + + +Arguments + + +x + +Column to compute on. + + + + +Note + +lower since 1.4.0 + + + +See Also + +Other string_funcs: ascii, +base64, concat_ws, +concat, decode, +encode, format_number, +format_string, initcap, +instr, length, +levenshtein, locate, +lpad, ltrim, +regexp_extract, +regexp_replace, reverse, +rpad, rtrim, +soundex, substring_index, +translate, trim, +unbase64, upper + + + +Examples + +## Not run: lower(df$c) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/lpad.html -- diff --git a/site/docs/2.2.2/api/R/lpad.html b/site/docs/2.2.2/api/R/lpad.html new file mode 100644 index 000..48f067e --- /dev/null +++ b/site/docs/2.2.2/api/R/lpad.html @@ -0,0 +1,81 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: lpad + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js">
[22/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FStringIndexerModel.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FStringIndexerModel.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FStringIndexerModel.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FStringIndexerModel.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FTokenizer.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FTokenizer.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FTokenizer.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FTokenizer.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) +
[01/51] [partial] spark-website git commit: Spark 2.2.2 docs
Repository: spark-website Updated Branches: refs/heads/asf-site 5660fb9a4 -> e10014632 http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/org/apache/spark/InternalAccumulator.output$.html -- diff --git a/site/docs/2.2.2/api/java/org/apache/spark/InternalAccumulator.output$.html b/site/docs/2.2.2/api/java/org/apache/spark/InternalAccumulator.output$.html new file mode 100644 index 000..d5d36f8 --- /dev/null +++ b/site/docs/2.2.2/api/java/org/apache/spark/InternalAccumulator.output$.html @@ -0,0 +1,325 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +InternalAccumulator.output$ (Spark 2.2.2 JavaDoc) + + + + + + +var methods = {"i0":10,"i1":10}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + + +Frames +NoFrames + + +AllClasses + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class InternalAccumulator.output$ + + + +Object + + +org.apache.spark.InternalAccumulator.output$ + + + + + + + +Enclosing class: +InternalAccumulator + + + +public static class InternalAccumulator.output$ +extends Object + + + + + + + + + + + +Field Summary + +Fields + +Modifier and Type +Field and Description + + +static InternalAccumulator.output$ +MODULE$ +Static reference to the singleton instance of this Scala object. + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +output$() + + + + + + + + + +Method Summary + +All MethodsInstance MethodsConcrete Methods + +Modifier and Type +Method and Description + + +String +BYTES_WRITTEN() + + +String +RECORDS_WRITTEN() + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Field Detail + + + + + +MODULE$ +public static finalInternalAccumulator.output$ MODULE$ +Static reference to the singleton instance of this Scala object. + + + + + + + + + +Constructor Detail + + + + + +output$ +publicoutput$() + + + + + + + + + +Method Detail + + + + + +BYTES_WRITTEN +publicStringBYTES_WRITTEN() + + + + + + + +RECORDS_WRITTEN +publicStringRECORDS_WRITTEN() + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + + +Frames +NoFrames + + +AllClasses + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[44/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/encode.html -- diff --git a/site/docs/2.2.2/api/R/encode.html b/site/docs/2.2.2/api/R/encode.html new file mode 100644 index 000..ae4448e --- /dev/null +++ b/site/docs/2.2.2/api/R/encode.html @@ -0,0 +1,78 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: encode + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +encode {SparkR}R Documentation + +encode + +Description + +Computes the first argument into a binary from a string using the provided character set +(one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). + + + +Usage + + +encode(x, charset) + +## S4 method for signature 'Column,character' +encode(x, charset) + + + +Arguments + + +x + +Column to compute on. + +charset + +Character set to use + + + + +Note + +encode since 1.6.0 + + + +See Also + +Other string_funcs: ascii, +base64, concat_ws, +concat, decode, +format_number, format_string, +initcap, instr, +length, levenshtein, +locate, lower, +lpad, ltrim, +regexp_extract, +regexp_replace, reverse, +rpad, rtrim, +soundex, substring_index, +translate, trim, +unbase64, upper + + + +Examples + +## Not run: encode(df$c, UTF-8) + + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/endsWith.html -- diff --git a/site/docs/2.2.2/api/R/endsWith.html b/site/docs/2.2.2/api/R/endsWith.html new file mode 100644 index 000..b5cc954 --- /dev/null +++ b/site/docs/2.2.2/api/R/endsWith.html @@ -0,0 +1,56 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: endsWith + + + + +endsWith {SparkR}R Documentation + +endsWith + +Description + +Determines if entries of x end with string (entries of) suffix respectively, +where strings are recycled to common lengths. + + + +Usage + + +endsWith(x, suffix) + +## S4 method for signature 'Column' +endsWith(x, suffix) + + + +Arguments + + +x + +vector of character string whose ends are considered + +suffix + +character vector (often of length one) + + + + +Note + +endsWith since 1.4.0 + + + +See Also + +Other colum_func: alias, +between, cast, +otherwise, over, +startsWith, substr + + +[Package SparkR version 2.2.2 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/R/except.html -- diff --git a/site/docs/2.2.2/api/R/except.html b/site/docs/2.2.2/api/R/except.html new file mode 100644 index 000..212a638 --- /dev/null +++ b/site/docs/2.2.2/api/R/except.html @@ -0,0 +1,113 @@ +http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd;>http://www.w3.org/1999/xhtml;>R: except + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +except {SparkR}R Documentation + +except + +Description + +Return a new SparkDataFrame containing rows in this SparkDataFrame +but not in another SparkDataFrame. This is equivalent to EXCEPT in SQL. + + + +Usage + + +except(x, y) + +## S4 method for signature 'SparkDataFrame,SparkDataFrame' +except(x, y) + + + +Arguments + + +x + +a SparkDataFrame. + +y + +a SparkDataFrame. + + + + +Value + +A SparkDataFrame containing the result of the except operation. + + + +Note + +except since 1.4.0 + + + +See Also + +Other SparkDataFrame functions: SparkDataFrame-class, +agg, arrange, +as.data.frame, +attach,SparkDataFrame-method, +cache, checkpoint, +coalesce, collect, +colnames, coltypes, +createOrReplaceTempView, +crossJoin, dapplyCollect, +dapply, describe, +dim, distinct, +dropDuplicates, dropna, +drop, dtypes, +explain, filter, +first, gapplyCollect, +gapply, getNumPartitions, +group_by, head, +hint, histogram, +insertInto, intersect, +isLocal, isStreaming, +join, limit, +merge, mutate, +ncol, nrow, +persist, printSchema, +randomSplit, rbind, +registerTempTable, rename, +repartition, sample, +saveAsTable, schema, +selectExpr, select, +showDF, show, +storageLevel, str, +subset, take, +toJSON, union, +unpersist, withColumn, +with, write.df, +write.jdbc, write.json, +write.orc, write.parquet, +write.stream, write.text + + + +Examples + +## Not run: +##D sparkR.session() +##D df1 - read.json(path) +##D df2 - read.json(path2) +##D exceptDF - except(df, df2) +## End(Not run) + + + +[Package SparkR version 2.2.2 Index] +
[06/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html -- diff --git a/site/docs/2.2.2/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html b/site/docs/2.2.2/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html new file mode 100644 index 000..84a169b --- /dev/null +++ b/site/docs/2.2.2/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html @@ -0,0 +1,379 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +AccumulatorParam.IntAccumulatorParam$ (Spark 2.2.2 JavaDoc) + + + + + + +var methods = {"i0":42,"i1":42}; +var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"],32:["t6","Deprecated Methods"]}; +var altColor = "altColor"; +var rowColor = "rowColor"; +var tableTab = "tableTab"; +var activeTableTab = "activeTableTab"; + + +JavaScript is disabled on your browser. + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + + +Frames +NoFrames + + +AllClasses + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class AccumulatorParam.IntAccumulatorParam$ + + + +Object + + +org.apache.spark.AccumulatorParam.IntAccumulatorParam$ + + + + + + + +All Implemented Interfaces: +java.io.Serializable, AccumulableParamObject,Object, AccumulatorParamObject + + +Enclosing interface: +AccumulatorParamT + + +Deprecated. +use AccumulatorV2. Since 2.0.0. + + +public static class AccumulatorParam.IntAccumulatorParam$ +extends Object +implements AccumulatorParamObject + +See Also: +Serialized Form + + + + + + + + + + + + +Nested Class Summary + + + + +Nested classes/interfaces inherited from interfaceorg.apache.spark.AccumulatorParam +AccumulatorParam.DoubleAccumulatorParam$, AccumulatorParam.FloatAccumulatorParam$, AccumulatorParam.IntAccumulatorParam$, AccumulatorParam.LongAccumulatorParam$, AccumulatorParam.StringAccumulatorParam$ + + + + + + + + +Field Summary + +Fields + +Modifier and Type +Field and Description + + +static AccumulatorParam.IntAccumulatorParam$ +MODULE$ +Deprecated. +Static reference to the singleton instance of this Scala object. + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +IntAccumulatorParam$() +Deprecated. + + + + + + + + + + +Method Summary + +All MethodsInstance MethodsConcrete MethodsDeprecated Methods + +Modifier and Type +Method and Description + + +int +addInPlace(intt1, + intt2) +Deprecated. + + + +int +zero(intinitialValue) +Deprecated. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.AccumulatorParam +addAccumulator + + + + + +Methods inherited from interfaceorg.apache.spark.AccumulableParam +addInPlace, zero + + + + + + + + + + + + + + +Field Detail + + + + + +MODULE$ +public static finalAccumulatorParam.IntAccumulatorParam$ MODULE$ +Deprecated. +Static reference to the singleton instance of this Scala object. + + + + + + + + + +Constructor Detail + + + + + +IntAccumulatorParam$ +publicIntAccumulatorParam$() +Deprecated. + + + + + + + + + +Method Detail + + + + + +addInPlace +publicintaddInPlace(intt1, + intt2) +Deprecated. + + + + + + + +zero +publicintzero(intinitialValue) +Deprecated. + + + + + + + + + + + + + +Skip navigation links + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +PrevClass +NextClass + + +Frames +NoFrames + + +AllClasses + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/org/apache/spark/AccumulatorParam.LongAccumulatorParam$.html -- diff --git a/site/docs/2.2.2/api/java/org/apache/spark/AccumulatorParam.LongAccumulatorParam$.html b/site/docs/2.2.2/api/java/org/apache/spark/AccumulatorParam.LongAccumulatorParam$.html new file mode 100644 index 000..6a016fa
[23/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FIndexToString.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FIndexToString.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FIndexToString.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FIndexToString.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FInteraction.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FInteraction.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FInteraction.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Ffeature%2FInteraction.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return
[16/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ffeature%2FHashingTF.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ffeature%2FHashingTF.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ffeature%2FHashingTF.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ffeature%2FHashingTF.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ffeature%2FVectorTransformer.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ffeature%2FVectorTransformer.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ffeature%2FVectorTransformer.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Ffeature%2FVectorTransformer.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot)
[27/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fapi%2Fjava%2Ffunction%2FForeachPartitionFunction.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fapi%2Fjava%2Ffunction%2FForeachPartitionFunction.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fapi%2Fjava%2Ffunction%2FForeachPartitionFunction.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fapi%2Fjava%2Ffunction%2FForeachPartitionFunction.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fapi%2Fjava%2Ffunction%2FFunction.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fapi%2Fjava%2Ffunction%2FFunction.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fapi%2Fjava%2Ffunction%2FFunction.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fapi%2Fjava%2Ffunction%2FFunction.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') +
[21/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fparam%2FDoubleArrayParam.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fparam%2FDoubleArrayParam.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fparam%2FDoubleArrayParam.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fparam%2FDoubleArrayParam.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fparam%2FDoubleParam.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fparam%2FDoubleParam.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fparam%2FDoubleParam.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fml%2Fparam%2FDoubleParam.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false;
[17/51] [partial] spark-website git commit: Spark 2.2.2 docs
http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclustering%2FLDA.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclustering%2FLDA.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclustering%2FLDA.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclustering%2FLDA.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/e1001463/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclustering%2FLDAModel.html -- diff --git a/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclustering%2FLDAModel.html b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclustering%2FLDAModel.html new file mode 100644 index 000..52511ca --- /dev/null +++ b/site/docs/2.2.2/api/java/index.html?org%2Fapache%2Fspark%2Fmllib%2Fclustering%2FLDAModel.html @@ -0,0 +1,75 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 2.2.2 JavaDoc + +tmpTargetPage = "" + window.location.search; +if (tmpTargetPage != "" && tmpTargetPage != "undefined") +tmpTargetPage = tmpTargetPage.substring(1); +if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage))) +tmpTargetPage = "undefined"; +targetPage = tmpTargetPage; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false;
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.2.2 [created] fc28ba3db - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27902 - /dev/spark/v2.2.2-rc2-bin/ /release/spark/spark-2.2.2/
Author: tgraves Date: Tue Jul 3 18:08:54 2018 New Revision: 27902 Log: Publish spark 2.2.2 Added: release/spark/spark-2.2.2/ - copied from r27901, dev/spark/v2.2.2-rc2-bin/ Removed: dev/spark/v2.2.2-rc2-bin/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27774 - /dev/spark/KEYS
Author: tgraves Date: Wed Jun 27 20:22:34 2018 New Revision: 27774 Log: Update KEYS Modified: dev/spark/KEYS Modified: dev/spark/KEYS == --- dev/spark/KEYS (original) +++ dev/spark/KEYS Wed Jun 27 20:22:34 2018 @@ -589,3 +589,60 @@ pp4VlK8tOgCZ3o6bWPR95GvVBvALnpoOJjS+cGwO 7fva6arUhIPxFUb+7G0SqJkpib8s4IZ+os3wh4Rv5ZfDAavB4ftC298= =DFmT -END PGP PUBLIC KEY BLOCK- + +pub 4096R/81E6C76A 2018-06-13 +uid Thomas Graves (CODE SIGNING KEY) +sub 4096R/FC8689C4 2018-06-13 + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v2.0.22 (GNU/Linux) + +mQINBFsgdxABEACwfMYBgIW9v73TIIQJnpHWE7SgGHfAs60FR128uLzzcAJcAly6 +/lq4nRU6inNqiATM3f/HXKhKsv1koFtqPXz3yT6SYTgbMLP0zCU41nPi00r0AOY0 +KrRmM76bbx1hYVjw3L0gTz2froeQxcYoTUMPNqGFjGfEYHfWo04O2Ev5nf+P5kEU +btk+QhzetcmIKRHVg1DSivPSknGYFRjVREWJTsfz8dZ/2MkN3gqPDcrIIgHJrGpM +cAESFz1E7RLX3i+cJGH90BCDXv2DgcRtVAHJ/9b9IbfZ24pWqwk7nt7tpcGMwDPy +9caQ2H5q4H+ECd2Rj2Tzh+ptRz6csLWUAONlS/+rP208wVTNqNCONlw5euYh8wJe +DVrahHUlsI/SB5edBKQY6/nW+54wphHdbF1iE7GV0XFyG4t0RARkvJlaO+5SMngw +3nxMd/CoL8r0j+0oncUkb8ZAPzJaVs0YHtEKOf/k7PpaRZW5LYCxiZzZWcjyTxtT +kqrPZ11MwEqLVFiEMnEg4JVCX14WqP9AdWnrw1pOf9iAgs7LwyOEWxaR9CN70CLK +Qao56vILC5/B9FN4jyee5y5U8yujqIXjU1ptKnMmYDEZn/fvns0kOix//lWSdHg5 +yt6/95Q/QXxOo4tenlKvLfORVkY4RNmWUOErZ+hbu0cysS8YToOif4+F1QARAQAB +tDVUaG9tYXMgR3JhdmVzIChDT0RFIFNJR05JTkcgS0VZKSA8dGdyYXZlc0BhcGFj +aGUub3JnPokCNwQTAQoAIQUCWyB3EAIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIX +gAAKCRDeT7zNgebHam5pEACHgEA+xEsL9TYKj7lk5pwa7tVSFRIjiZqu7fausssS +sAnCDyHv683HKLh8b+tFONeZlTIz1ygeEnbzYr2uthF8GPId1ydNFrZ3/tV/8XlN +hb2AwThuDXvMHpw1GvTqbMF80kdJ5SIUIC8l6M05NyInU9kTF3mKjol2UXIpBono +rcSeozCn6k6NLYAopmwJMGtMc4vf1SrCn59+Y2QUBrUsw8iA1yAwc2IYdteYbC1k +4lrLPIMvGzKC8wAb2I46B3EWkR4soQ8VYfdkZKjJqdcYUSJSL9Jw2XgRuQxKrSoB +6tc52eR0V/rPCJCTkqFB5eLIYwn4A1oD3PZllSaBlG6UxWlxZiyoAf6EzgIbORTg +ark5NA6r9p+is7U684ysLBa1be/RX4nq7OIxdOomARErVbepDsJa8TkTyGs+CTau +AWIi9UbYn9DZ08VMe0PPqT+7s2kcjuddIP2WX6ScYt0c3HAAr9Ky3FwOGvkMlWo2 +DETmQwt1RNhEZC0kSvJSveqHmMaaY4DcgcfPYqUpYbLKDiIzxoKkAOYeyt7X85qJ +xpUC5nbA1G9Vp1o+zaqzOqAOLTLsI370kVj+57XoFiwIo9Fl0VlSxLoCzXJ7Rjkt +iCu2Wwxd22W4f8OtRVXJQsL1ikAa5hsE0zkpHHypa8Wa41NqVM2H1nPjOLmcRQky +xLkCDQRbIHcQARAA5C1Z49Fs0PsKQNjEFyMxvJGCAdGhQmH4VGqZxJQ5GRmScucG +gPkeW2L3n3v6oguASihB8KgF+WgLgM/8Tn5u7BacY+7hcqgSDgUglVAqcGGrsNTh +PTdO92HFcIdKfAoacF8IPRDwhCBABVUQyfhVS4yzTHbk1JbX80mcGOkaZMU1tnrF +etfxcxEQI+1DP/JZ70A6EK+FlyGgmtjrjPRV38DJWDYV4NxHEXSO0d669dIroHBE +LzcbarB4k9tkSnO2hmbulX2622vKKMuxDmIbKgid69AKVWD0qFaCNSYrdOcexbQr ++LNW6gS30sHhauUIvAu1pGWJefwJjvSme39uBbhG9dpP9tjeUH62RejXvqu0lb4o +t8tRbWcOZXEZhQQy/SVJXwpMevGAKO8euOuIY9m3uNzf0wMKD7y/023odDhk4qb7 +OIjJwq1NJqkIRJLaY5MBeyAd7xwkfi0/EhrSkaoISJwbrrfHaBCgEJVBWXF+pRCB +Tp6FotqL45AH/4j+/cw/k94kqc433u7P1ORCWWwgVxQN69X+8hZBo5BSVnW+TSvY +MATFM/7uMvj2yk0DHPbK5jZ2sqpe5ejQTmougya3mj8kEr/vyNExM8ebo+KdSEuZ +hnbBjRhCsQ/SgwIpY53Lbp4gZAPjN66VX0kbuJ0Nocvp2dMl+9QkVq+9im8AEQEA +AYkCHwQYAQoACQUCWyB3EAIbDAAKCRDeT7zNgebHavfsEACp7nEkv7fYU9QgnGpP +/14yICPtnfBJzJ5Z35WkjSStBxd+Xdbnec1UlLqzGPPuXtLOcouRvmN6WXGPynB9 +ERd2Pga1yiexDZhVVR/vucwNq5pQv6LTlufMI1Nan/p6swwlsDWpVzPfGRaOMOt1 +1OaWeTA0q2ByDWQQV1qay09fs9dJsW6YBzFCKwNNaoXhM5zrmlQj80Ces7gMBJd+ +iivRVceRC+b2vAMvceX2FReS21o4S+mTKlTCvUNDbJe46a+rUEeHzBYN5qvbLedO +PQEXXUJLrjbj8/uA5gfW+Ib2azfpo7ezPnxYBNz4UC9CGF3JWFnYX7UHpwniB2pt +J9RrNiKL9Ohdpg1qNgJCAXzbrE31bqpaGawTFYMtRlEYHKDTs6jglX9cIVqDiz+y +smiMjVBM4G8PVwCvC0OfuCOqQrbieiSNfdBlcWJG5MbZqyXwQNiuRl5tdJPtB4Kx +NfSm3khyy1/xze0gOi4OwjznHXwW7606Uu6eAKKhWcQ7mHovEJSp5ziWb+/1PXe7 +f5XpIqrYxuMpghlA6Q72sON9zcmFEKW6y+3mRKtBzOsJXLKS0AWXi19zoFlq3Kzs +UiZuJIMHLPJK4sbOj5nMZE2163zUXz+gOVMeLqVxV/zlW6xnYu84taF4PbYIzmts +qG1EJoF+tteqemi1ZwYipD06wA== +=cA5d +-END PGP PUBLIC KEY BLOCK- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27773 - /release/spark/KEYS
Author: tgraves Date: Wed Jun 27 20:04:43 2018 New Revision: 27773 Log: Update KEYS Modified: release/spark/KEYS Modified: release/spark/KEYS == --- release/spark/KEYS (original) +++ release/spark/KEYS Wed Jun 27 20:04:43 2018 @@ -589,3 +589,60 @@ pp4VlK8tOgCZ3o6bWPR95GvVBvALnpoOJjS+cGwO 7fva6arUhIPxFUb+7G0SqJkpib8s4IZ+os3wh4Rv5ZfDAavB4ftC298= =DFmT -END PGP PUBLIC KEY BLOCK- + +pub 4096R/81E6C76A 2018-06-13 +uid Thomas Graves (CODE SIGNING KEY) +sub 4096R/FC8689C4 2018-06-13 + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v2.0.22 (GNU/Linux) + +mQINBFsgdxABEACwfMYBgIW9v73TIIQJnpHWE7SgGHfAs60FR128uLzzcAJcAly6 +/lq4nRU6inNqiATM3f/HXKhKsv1koFtqPXz3yT6SYTgbMLP0zCU41nPi00r0AOY0 +KrRmM76bbx1hYVjw3L0gTz2froeQxcYoTUMPNqGFjGfEYHfWo04O2Ev5nf+P5kEU +btk+QhzetcmIKRHVg1DSivPSknGYFRjVREWJTsfz8dZ/2MkN3gqPDcrIIgHJrGpM +cAESFz1E7RLX3i+cJGH90BCDXv2DgcRtVAHJ/9b9IbfZ24pWqwk7nt7tpcGMwDPy +9caQ2H5q4H+ECd2Rj2Tzh+ptRz6csLWUAONlS/+rP208wVTNqNCONlw5euYh8wJe +DVrahHUlsI/SB5edBKQY6/nW+54wphHdbF1iE7GV0XFyG4t0RARkvJlaO+5SMngw +3nxMd/CoL8r0j+0oncUkb8ZAPzJaVs0YHtEKOf/k7PpaRZW5LYCxiZzZWcjyTxtT +kqrPZ11MwEqLVFiEMnEg4JVCX14WqP9AdWnrw1pOf9iAgs7LwyOEWxaR9CN70CLK +Qao56vILC5/B9FN4jyee5y5U8yujqIXjU1ptKnMmYDEZn/fvns0kOix//lWSdHg5 +yt6/95Q/QXxOo4tenlKvLfORVkY4RNmWUOErZ+hbu0cysS8YToOif4+F1QARAQAB +tDVUaG9tYXMgR3JhdmVzIChDT0RFIFNJR05JTkcgS0VZKSA8dGdyYXZlc0BhcGFj +aGUub3JnPokCNwQTAQoAIQUCWyB3EAIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIX +gAAKCRDeT7zNgebHam5pEACHgEA+xEsL9TYKj7lk5pwa7tVSFRIjiZqu7fausssS +sAnCDyHv683HKLh8b+tFONeZlTIz1ygeEnbzYr2uthF8GPId1ydNFrZ3/tV/8XlN +hb2AwThuDXvMHpw1GvTqbMF80kdJ5SIUIC8l6M05NyInU9kTF3mKjol2UXIpBono +rcSeozCn6k6NLYAopmwJMGtMc4vf1SrCn59+Y2QUBrUsw8iA1yAwc2IYdteYbC1k +4lrLPIMvGzKC8wAb2I46B3EWkR4soQ8VYfdkZKjJqdcYUSJSL9Jw2XgRuQxKrSoB +6tc52eR0V/rPCJCTkqFB5eLIYwn4A1oD3PZllSaBlG6UxWlxZiyoAf6EzgIbORTg +ark5NA6r9p+is7U684ysLBa1be/RX4nq7OIxdOomARErVbepDsJa8TkTyGs+CTau +AWIi9UbYn9DZ08VMe0PPqT+7s2kcjuddIP2WX6ScYt0c3HAAr9Ky3FwOGvkMlWo2 +DETmQwt1RNhEZC0kSvJSveqHmMaaY4DcgcfPYqUpYbLKDiIzxoKkAOYeyt7X85qJ +xpUC5nbA1G9Vp1o+zaqzOqAOLTLsI370kVj+57XoFiwIo9Fl0VlSxLoCzXJ7Rjkt +iCu2Wwxd22W4f8OtRVXJQsL1ikAa5hsE0zkpHHypa8Wa41NqVM2H1nPjOLmcRQky +xLkCDQRbIHcQARAA5C1Z49Fs0PsKQNjEFyMxvJGCAdGhQmH4VGqZxJQ5GRmScucG +gPkeW2L3n3v6oguASihB8KgF+WgLgM/8Tn5u7BacY+7hcqgSDgUglVAqcGGrsNTh +PTdO92HFcIdKfAoacF8IPRDwhCBABVUQyfhVS4yzTHbk1JbX80mcGOkaZMU1tnrF +etfxcxEQI+1DP/JZ70A6EK+FlyGgmtjrjPRV38DJWDYV4NxHEXSO0d669dIroHBE +LzcbarB4k9tkSnO2hmbulX2622vKKMuxDmIbKgid69AKVWD0qFaCNSYrdOcexbQr ++LNW6gS30sHhauUIvAu1pGWJefwJjvSme39uBbhG9dpP9tjeUH62RejXvqu0lb4o +t8tRbWcOZXEZhQQy/SVJXwpMevGAKO8euOuIY9m3uNzf0wMKD7y/023odDhk4qb7 +OIjJwq1NJqkIRJLaY5MBeyAd7xwkfi0/EhrSkaoISJwbrrfHaBCgEJVBWXF+pRCB +Tp6FotqL45AH/4j+/cw/k94kqc433u7P1ORCWWwgVxQN69X+8hZBo5BSVnW+TSvY +MATFM/7uMvj2yk0DHPbK5jZ2sqpe5ejQTmougya3mj8kEr/vyNExM8ebo+KdSEuZ +hnbBjRhCsQ/SgwIpY53Lbp4gZAPjN66VX0kbuJ0Nocvp2dMl+9QkVq+9im8AEQEA +AYkCHwQYAQoACQUCWyB3EAIbDAAKCRDeT7zNgebHavfsEACp7nEkv7fYU9QgnGpP +/14yICPtnfBJzJ5Z35WkjSStBxd+Xdbnec1UlLqzGPPuXtLOcouRvmN6WXGPynB9 +ERd2Pga1yiexDZhVVR/vucwNq5pQv6LTlufMI1Nan/p6swwlsDWpVzPfGRaOMOt1 +1OaWeTA0q2ByDWQQV1qay09fs9dJsW6YBzFCKwNNaoXhM5zrmlQj80Ces7gMBJd+ +iivRVceRC+b2vAMvceX2FReS21o4S+mTKlTCvUNDbJe46a+rUEeHzBYN5qvbLedO +PQEXXUJLrjbj8/uA5gfW+Ib2azfpo7ezPnxYBNz4UC9CGF3JWFnYX7UHpwniB2pt +J9RrNiKL9Ohdpg1qNgJCAXzbrE31bqpaGawTFYMtRlEYHKDTs6jglX9cIVqDiz+y +smiMjVBM4G8PVwCvC0OfuCOqQrbieiSNfdBlcWJG5MbZqyXwQNiuRl5tdJPtB4Kx +NfSm3khyy1/xze0gOi4OwjznHXwW7606Uu6eAKKhWcQ7mHovEJSp5ziWb+/1PXe7 +f5XpIqrYxuMpghlA6Q72sON9zcmFEKW6y+3mRKtBzOsJXLKS0AWXi19zoFlq3Kzs +UiZuJIMHLPJK4sbOj5nMZE2163zUXz+gOVMeLqVxV/zlW6xnYu84taF4PbYIzmts +qG1EJoF+tteqemi1ZwYipD06wA== +=cA5d +-END PGP PUBLIC KEY BLOCK- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27769 - in /dev/spark/v2.2.2-rc2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _site/api/java/org/apache/spark
Author: tgraves Date: Wed Jun 27 17:44:22 2018 New Revision: 27769 Log: Apache Spark v2.2.2-rc2 docs [This commit notification would consist of 1346 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27768 - /dev/spark/v2.2.2-rc2-bin/
Author: tgraves Date: Wed Jun 27 17:16:26 2018 New Revision: 27768 Log: Apache Spark v2.2.2-rc2 Added: dev/spark/v2.2.2-rc2-bin/ dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz (with props) dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz.asc dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz.sha512 dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz (with props) dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz.asc dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz.sha512 dev/spark/v2.2.2-rc2-bin/spark-2.2.2-bin-hadoop2.6.tgz (with props) dev/spark/v2.2.2-rc2-bin/spark-2.2.2-bin-hadoop2.6.tgz.asc dev/spark/v2.2.2-rc2-bin/spark-2.2.2-bin-hadoop2.6.tgz.sha512 dev/spark/v2.2.2-rc2-bin/spark-2.2.2-bin-hadoop2.7.tgz (with props) dev/spark/v2.2.2-rc2-bin/spark-2.2.2-bin-hadoop2.7.tgz.asc dev/spark/v2.2.2-rc2-bin/spark-2.2.2-bin-hadoop2.7.tgz.sha512 dev/spark/v2.2.2-rc2-bin/spark-2.2.2-bin-without-hadoop.tgz (with props) dev/spark/v2.2.2-rc2-bin/spark-2.2.2-bin-without-hadoop.tgz.asc dev/spark/v2.2.2-rc2-bin/spark-2.2.2-bin-without-hadoop.tgz.sha512 dev/spark/v2.2.2-rc2-bin/spark-2.2.2.tgz (with props) dev/spark/v2.2.2-rc2-bin/spark-2.2.2.tgz.asc dev/spark/v2.2.2-rc2-bin/spark-2.2.2.tgz.sha512 Added: dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz == Binary file - no diff available. Propchange: dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz.asc == --- dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz.asc (added) +++ dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz.asc Wed Jun 27 17:16:26 2018 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v1 + +iQIcBAABAgAGBQJbM576AAoJEN5PvM2B5sdqBcQP/390/0Cyo/E2UYUs361AkaR2 +jCI+w0/+msaX4wd0TExllr2BG2Cwz2XHxL0nIOtd3h8MKAw/iBwO2eVd2imHa27i +iZkFXrEBT8Gsompq6Nl/j4Cht/I5ay5qEaG4QHSTnJbLgtQIVVw5Rovw/KLVVm0/ +9r+YYFjahsFTw3JTp62LFUXbF81aTgJWwSw05b5TvAwfws4TZoZmES8yz/YKrRWe +UBZFShAvehVEkSqP7qYm8Dd/fQnfyxaTTUfWXcJQX7fHNrQGXdPOECnlC+TMrmn2 +AqQaKTjHU3p8Wvv78tVLmal9By0NWcPqiSrQ2TlJFEygKseqrDNTw47p06xZHuuz +bYmJu5k7JcQJyG7ThpQgnIFga0TjM4Ymm+9pN5OKsPyNIGhFmWHVp/zAkdvMHVUg +D+5KIDJZPtOK3iL8hAjvgCM7W+Q0GcSOCtigNc7S3CaP+0xcI7RfOAy/Ns4wHPwD +hJZA6u8lLntVwYfmamW4SCB6WHDvdHXl8H2QQ+mVqRpGdnK2fYBUpvcs0dijUAs9 +ZWIvDPuvCE+SnW3zShY0CGxwAhYFEO6vXVExQWsdInTlG7qXDtJ2S+9shm8PohYy +CdDijjBW5sVR9OnLEFoMtxiOGXaGNo04kK4U8lzkq5reFtQc8I9fr+TUSvki9WqM +Mf1RUFRr959InxcvafD8 +=WMWJ +-END PGP SIGNATURE- Added: dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz.sha512 == --- dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz.sha512 (added) +++ dev/spark/v2.2.2-rc2-bin/SparkR_2.2.2.tar.gz.sha512 Wed Jun 27 17:16:26 2018 @@ -0,0 +1,3 @@ +SparkR_2.2.2.tar.gz: FD8AB48E 7379B62C 85744DD8 D9CC0C85 C76EA843 460C3FEC + A65DBA30 A47B427C A388A8E2 99CF593A 0CE70FA6 F66BA6BF + 2280DF30 F3C4FA63 E451C4BD D49D124D Added: dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz == Binary file - no diff available. Propchange: dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz.asc == --- dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz.asc (added) +++ dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz.asc Wed Jun 27 17:16:26 2018 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v1 + +iQIcBAABAgAGBQJbM6QPAAoJEN5PvM2B5sdqnhsP/0pwzvbnrycc6EgAr0/Qnzeu +IDc56d+NIOFLX3KcqF+ggUpll5tj6+Llq8Qs+JNz4ufBT5FNd6/0sot9PfU86AzN +A5+mlLAyOUTh1WSxY54Mo3Mc8nrLKQHw7QQFMxCfay9oTBrlURhyNp5bEwDfOurf +LBlX36sTv3H60N9/RZQWt2OF9YnGHntLIe2Fsqk7GtnUMmkdAqdGGtq1w4cjVtci +5/8xRqRwhVoaGcLLWzsdYMuOqnhBXq6GKtCgF4PLy25vAPVpVQmwcwrx2GVSY6Rr +M5RNGw8cq2AdPHGojbygUtwg7h2N8BFqwM5EgYKIaKyWtw0N7RKRJ4YWl7QJorAe +Q4xfFGCwmzmwqMpl3/ZontQMw56FedWLVuETmBoE30/lqPZKsJCU9eS0/8pdBo4t +7UpQgao25kaBqy4RS2A48343oY9y4vreUQxxjxilV/RxCYI+yNalU3+oreHpaDAk +k9SGprdmt5zXAkEd4wNmrLWHVB/IlyzjH8s7gKBlHA9GLpxnFwlMInZJjZkN7Jxt +6A3SDOzhuuEkPHjb5rT1DyCFv7w5rN6TZ0iFxAh9sgR5MbMBaQFRr8krPiYVcG/2 +k9d8FMrDgAfttRVMe6fOg721bnAZaDwdCcV07DMn5/lR60bjhhHMrsD7JWy89RI9 +qrS3ckY1stERB3toI/Li +=yKed +-END PGP SIGNATURE- Added: dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz.sha512 == --- dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz.sha512 (added) +++ dev/spark/v2.2.2-rc2-bin/pyspark-2.2.2.tar.gz.sha512
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.2.2-rc2 [created] fc28ba3db - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: Preparing development version 2.2.3-SNAPSHOT
Preparing development version 2.2.3-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47958270 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47958270 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47958270 Branch: refs/heads/branch-2.2 Commit: 47958270f51d08a076e20e299bb2d86fa1927d26 Parents: fc28ba3 Author: Thomas Graves Authored: Wed Jun 27 13:55:20 2018 + Committer: Thomas Graves Committed: Wed Jun 27 13:55:20 2018 + -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 39 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/47958270/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 380b3ef..ad72330 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.2.2 +Version: 2.2.3 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/47958270/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 775d9f8..f9ec6e7 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.2.2 +2.2.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/47958270/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 017f0f1..55d29d5 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.2 +2.2.3-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/47958270/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 17d5216..6d84766 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.2 +2.2.3-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/47958270/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 8d03c86..6228be6 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.2 +2.2.3-SNAPSHOT ../../pom.xml
[1/2] spark git commit: Preparing Spark release v2.2.2-rc2
Repository: spark Updated Branches: refs/heads/branch-2.2 72575d0bb -> 47958270f Preparing Spark release v2.2.2-rc2 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc28ba3d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc28ba3d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc28ba3d Branch: refs/heads/branch-2.2 Commit: fc28ba3db7185e84b6dbd02ad8ef8f1d06b9e3c6 Parents: 72575d0 Author: Thomas Graves Authored: Wed Jun 27 13:55:11 2018 + Committer: Thomas Graves Committed: Wed Jun 27 13:55:11 2018 + -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 39 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fc28ba3d/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index ad72330..380b3ef 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.2.3 +Version: 2.2.2 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/fc28ba3d/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index f9ec6e7..775d9f8 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.2.3-SNAPSHOT +2.2.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/fc28ba3d/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 55d29d5..017f0f1 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.3-SNAPSHOT +2.2.2 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/fc28ba3d/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 6d84766..17d5216 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.3-SNAPSHOT +2.2.2 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/fc28ba3d/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 6228be6..8d03c86 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.3-SNAPSHOT +2.2.2 ../../pom.xml