[3/3] spark-website git commit: Add 1.6.3 release.
Add 1.6.3 release. Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/b9aa4c3e Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/b9aa4c3e Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/b9aa4c3e Branch: refs/heads/asf-site Commit: b9aa4c3eefe4788fa97086ea87d92d8e3bfbc535 Parents: 24d32b7 Author: Reynold XinAuthored: Mon Nov 7 19:05:12 2016 -0800 Committer: Reynold Xin Committed: Mon Nov 7 19:05:12 2016 -0800 -- documentation.md| 1 + js/downloads.js | 3 +- news/_posts/2016-11-07-spark-1-6-3-released.md | 16 ++ .../_posts/2016-11-07-spark-release-1-6-3.md| 18 ++ site/community.html | 6 +- site/documentation.html | 12 +- site/downloads.html | 6 +- site/examples.html | 6 +- site/faq.html | 6 +- site/graphx/index.html | 6 +- site/index.html | 6 +- site/js/downloads.js| 3 +- site/mailing-lists.html | 6 +- site/mllib/index.html | 6 +- site/news/amp-camp-2013-registration-ope.html | 6 +- .../news/announcing-the-first-spark-summit.html | 6 +- .../news/fourth-spark-screencast-published.html | 6 +- site/news/index.html| 30 ++- site/news/nsdi-paper.html | 6 +- site/news/one-month-to-spark-summit-2015.html | 6 +- .../proposals-open-for-spark-summit-east.html | 6 +- ...registration-open-for-spark-summit-east.html | 6 +- .../news/run-spark-and-shark-on-amazon-emr.html | 6 +- site/news/spark-0-6-1-and-0-5-2-released.html | 6 +- site/news/spark-0-6-2-released.html | 6 +- site/news/spark-0-7-0-released.html | 6 +- site/news/spark-0-7-2-released.html | 6 +- site/news/spark-0-7-3-released.html | 6 +- site/news/spark-0-8-0-released.html | 6 +- site/news/spark-0-8-1-released.html | 6 +- site/news/spark-0-9-0-released.html | 6 +- site/news/spark-0-9-1-released.html | 8 +- site/news/spark-0-9-2-released.html | 8 +- site/news/spark-1-0-0-released.html | 6 +- site/news/spark-1-0-1-released.html | 6 +- site/news/spark-1-0-2-released.html | 6 +- site/news/spark-1-1-0-released.html | 8 +- site/news/spark-1-1-1-released.html | 6 +- site/news/spark-1-2-0-released.html | 6 +- site/news/spark-1-2-1-released.html | 6 +- site/news/spark-1-2-2-released.html | 8 +- site/news/spark-1-3-0-released.html | 6 +- site/news/spark-1-4-0-released.html | 6 +- site/news/spark-1-4-1-released.html | 6 +- site/news/spark-1-5-0-released.html | 6 +- site/news/spark-1-5-1-released.html | 6 +- site/news/spark-1-5-2-released.html | 6 +- site/news/spark-1-6-0-released.html | 6 +- site/news/spark-1-6-1-released.html | 6 +- site/news/spark-1-6-2-released.html | 6 +- site/news/spark-1-6-3-released.html | 213 ++ site/news/spark-2-0-0-released.html | 6 +- site/news/spark-2-0-1-released.html | 6 +- site/news/spark-2.0.0-preview.html | 6 +- .../spark-accepted-into-apache-incubator.html | 6 +- site/news/spark-and-shark-in-the-news.html | 8 +- site/news/spark-becomes-tlp.html| 6 +- site/news/spark-featured-in-wired.html | 6 +- .../spark-mailing-lists-moving-to-apache.html | 6 +- site/news/spark-meetups.html| 6 +- site/news/spark-screencasts-published.html | 6 +- site/news/spark-summit-2013-is-a-wrap.html | 6 +- site/news/spark-summit-2014-videos-posted.html | 6 +- site/news/spark-summit-2015-videos-posted.html | 6 +- site/news/spark-summit-agenda-posted.html | 6 +- .../spark-summit-east-2015-videos-posted.html | 8 +- .../spark-summit-east-2016-cfp-closing.html | 6 +- site/news/spark-summit-east-agenda-posted.html | 6 +- .../news/spark-summit-europe-agenda-posted.html | 6 +- site/news/spark-summit-europe.html | 6 +- .../spark-summit-june-2016-agenda-posted.html | 6 +- site/news/spark-tips-from-quantifind.html | 6 +- .../spark-user-survey-and-powered-by-page.html | 6 +- site/news/spark-version-0-6-0-released.html | 6 +-
[2/3] spark-website git commit: Add 1.6.3 release.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/b9aa4c3e/site/news/spark-2-0-0-released.html -- diff --git a/site/news/spark-2-0-0-released.html b/site/news/spark-2-0-0-released.html index dd2f3e8..070d5ac 100644 --- a/site/news/spark-2-0-0-released.html +++ b/site/news/spark-2-0-0-released.html @@ -150,6 +150,9 @@ Latest News + Spark 1.6.3 released + (Nov 07, 2016) + Spark 2.0.1 released (Oct 03, 2016) @@ -159,9 +162,6 @@ Spark 1.6.2 released (Jun 25, 2016) - Call for Presentations for Spark Summit EU is Open - (Jun 16, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/b9aa4c3e/site/news/spark-2-0-1-released.html -- diff --git a/site/news/spark-2-0-1-released.html b/site/news/spark-2-0-1-released.html index 8ee951f..f772398 100644 --- a/site/news/spark-2-0-1-released.html +++ b/site/news/spark-2-0-1-released.html @@ -150,6 +150,9 @@ Latest News + Spark 1.6.3 released + (Nov 07, 2016) + Spark 2.0.1 released (Oct 03, 2016) @@ -159,9 +162,6 @@ Spark 1.6.2 released (Jun 25, 2016) - Call for Presentations for Spark Summit EU is Open - (Jun 16, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/b9aa4c3e/site/news/spark-2.0.0-preview.html -- diff --git a/site/news/spark-2.0.0-preview.html b/site/news/spark-2.0.0-preview.html index 87d446a..7e7f1a8 100644 --- a/site/news/spark-2.0.0-preview.html +++ b/site/news/spark-2.0.0-preview.html @@ -150,6 +150,9 @@ Latest News + Spark 1.6.3 released + (Nov 07, 2016) + Spark 2.0.1 released (Oct 03, 2016) @@ -159,9 +162,6 @@ Spark 1.6.2 released (Jun 25, 2016) - Call for Presentations for Spark Summit EU is Open - (Jun 16, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/b9aa4c3e/site/news/spark-accepted-into-apache-incubator.html -- diff --git a/site/news/spark-accepted-into-apache-incubator.html b/site/news/spark-accepted-into-apache-incubator.html index bb6ed2e..e6330cf 100644 --- a/site/news/spark-accepted-into-apache-incubator.html +++ b/site/news/spark-accepted-into-apache-incubator.html @@ -150,6 +150,9 @@ Latest News + Spark 1.6.3 released + (Nov 07, 2016) + Spark 2.0.1 released (Oct 03, 2016) @@ -159,9 +162,6 @@ Spark 1.6.2 released (Jun 25, 2016) - Call for Presentations for Spark Summit EU is Open - (Jun 16, 2016) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/b9aa4c3e/site/news/spark-and-shark-in-the-news.html -- diff --git a/site/news/spark-and-shark-in-the-news.html b/site/news/spark-and-shark-in-the-news.html index 16375a5..d48af18 100644 --- a/site/news/spark-and-shark-in-the-news.html +++ b/site/news/spark-and-shark-in-the-news.html @@ -150,6 +150,9 @@ Latest News + Spark 1.6.3 released + (Nov 07, 2016) + Spark 2.0.1 released (Oct 03, 2016) @@ -159,9 +162,6 @@ Spark 1.6.2 released (Jun 25, 2016) - Call for Presentations for Spark Summit EU is Open - (Jun 16, 2016) - Archive @@ -196,7 +196,7 @@ http://data-informed.com/spark-an-open-source-engine-for-iterative-data-mining/;>DataInformed interviewed two Spark users and wrote about their applications in anomaly detection, predictive analytics and data mining. -In other news, there will be a full day of tutorials on Spark and Shark at the http://strataconf.com/strata2013;>OReilly Strata conference in February. They include a three-hour http://strataconf.com/strata2013/public/schedule/detail/27438;>introduction to Spark, Shark and BDAS Tuesday morning, and a three-hour http://strataconf.com/strata2013/public/schedule/detail/27440;>hands-on exercise session. +In other news, there will be a full day of tutorials on Spark and Shark at the http://strataconf.com/strata2013;>OReilly Strata conference in February. They include a three-hour http://strataconf.com/strata2013/public/schedule/detail/27438;>introduction to Spark, Shark and BDAS Tuesday
spark git commit: [SPARK-16575][CORE] partition calculation mismatch with sc.binaryFiles
Repository: spark Updated Branches: refs/heads/branch-2.1 4cb4e5ff0 -> c8879bf1e [SPARK-16575][CORE] partition calculation mismatch with sc.binaryFiles ## What changes were proposed in this pull request? This Pull request comprises of the critical bug SPARK-16575 changes. This change rectifies the issue with BinaryFileRDD partition calculations as upon creating an RDD with sc.binaryFiles, the resulting RDD always just consisted of two partitions only. ## How was this patch tested? The original issue ie. getNumPartitions on binary Files RDD (always having two partitions) was first replicated and then tested upon the changes. Also the unit tests have been checked and passed. This contribution is my original work and I licence the work to the project under the project's open source license srowen hvanhovell rxin vanzin skyluc kmader zsxwing datafarmer Please have a look . Author: fidato <fidato.jul...@gmail.com> Closes #15327 from fidato13/SPARK-16575. (cherry picked from commit 6f3697136aa68dc39d3ce42f43a7af554d2a3bf9) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8879bf1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8879bf1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8879bf1 Branch: refs/heads/branch-2.1 Commit: c8879bf1ee2af9ccd5d5656571d931d2fc1da024 Parents: 4cb4e5f Author: fidato <fidato.jul...@gmail.com> Authored: Mon Nov 7 18:41:17 2016 -0800 Committer: Reynold Xin <r...@databricks.com> Committed: Mon Nov 7 18:41:29 2016 -0800 -- .../org/apache/spark/input/PortableDataStream.scala | 14 +++--- .../org/apache/spark/internal/config/package.scala | 13 + .../scala/org/apache/spark/rdd/BinaryFileRDD.scala | 4 ++-- docs/configuration.md | 16 4 files changed, 42 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c8879bf1/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala -- diff --git a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala index f66510b..59404e0 100644 --- a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala +++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala @@ -27,6 +27,9 @@ import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext} import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit} +import org.apache.spark.internal.config +import org.apache.spark.SparkContext + /** * A general format for reading whole files in as streams, byte arrays, * or other functions to be added @@ -40,9 +43,14 @@ private[spark] abstract class StreamFileInputFormat[T] * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API * which is set through setMaxSplitSize */ - def setMinPartitions(context: JobContext, minPartitions: Int) { -val totalLen = listStatus(context).asScala.filterNot(_.isDirectory).map(_.getLen).sum -val maxSplitSize = math.ceil(totalLen / math.max(minPartitions, 1.0)).toLong + def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions: Int) { +val defaultMaxSplitBytes = sc.getConf.get(config.FILES_MAX_PARTITION_BYTES) +val openCostInBytes = sc.getConf.get(config.FILES_OPEN_COST_IN_BYTES) +val defaultParallelism = sc.defaultParallelism +val files = listStatus(context).asScala +val totalBytes = files.filterNot(_.isDirectory).map(_.getLen + openCostInBytes).sum +val bytesPerCore = totalBytes / defaultParallelism +val maxSplitSize = Math.min(defaultMaxSplitBytes, Math.max(openCostInBytes, bytesPerCore)) super.setMaxSplitSize(maxSplitSize) } http://git-wip-us.apache.org/repos/asf/spark/blob/c8879bf1/core/src/main/scala/org/apache/spark/internal/config/package.scala -- diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 497ca92..4a3e3d5 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -206,4 +206,17 @@ package object config { "encountering corrupt files and contents that have been read will still be returned.") .booleanConf .createWithDefault(false) + + private[spark] val FILES_MAX_PARTITION_BYTES = ConfigB
spark git commit: [SPARK-16575][CORE] partition calculation mismatch with sc.binaryFiles
Repository: spark Updated Branches: refs/heads/master 1da64e1fa -> 6f3697136 [SPARK-16575][CORE] partition calculation mismatch with sc.binaryFiles ## What changes were proposed in this pull request? This Pull request comprises of the critical bug SPARK-16575 changes. This change rectifies the issue with BinaryFileRDD partition calculations as upon creating an RDD with sc.binaryFiles, the resulting RDD always just consisted of two partitions only. ## How was this patch tested? The original issue ie. getNumPartitions on binary Files RDD (always having two partitions) was first replicated and then tested upon the changes. Also the unit tests have been checked and passed. This contribution is my original work and I licence the work to the project under the project's open source license srowen hvanhovell rxin vanzin skyluc kmader zsxwing datafarmer Please have a look . Author: fidato <fidato.jul...@gmail.com> Closes #15327 from fidato13/SPARK-16575. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6f369713 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6f369713 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6f369713 Branch: refs/heads/master Commit: 6f3697136aa68dc39d3ce42f43a7af554d2a3bf9 Parents: 1da64e1 Author: fidato <fidato.jul...@gmail.com> Authored: Mon Nov 7 18:41:17 2016 -0800 Committer: Reynold Xin <r...@databricks.com> Committed: Mon Nov 7 18:41:17 2016 -0800 -- .../org/apache/spark/input/PortableDataStream.scala | 14 +++--- .../org/apache/spark/internal/config/package.scala | 13 + .../scala/org/apache/spark/rdd/BinaryFileRDD.scala | 4 ++-- docs/configuration.md | 16 4 files changed, 42 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6f369713/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala -- diff --git a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala index f66510b..59404e0 100644 --- a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala +++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala @@ -27,6 +27,9 @@ import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext} import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit} +import org.apache.spark.internal.config +import org.apache.spark.SparkContext + /** * A general format for reading whole files in as streams, byte arrays, * or other functions to be added @@ -40,9 +43,14 @@ private[spark] abstract class StreamFileInputFormat[T] * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API * which is set through setMaxSplitSize */ - def setMinPartitions(context: JobContext, minPartitions: Int) { -val totalLen = listStatus(context).asScala.filterNot(_.isDirectory).map(_.getLen).sum -val maxSplitSize = math.ceil(totalLen / math.max(minPartitions, 1.0)).toLong + def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions: Int) { +val defaultMaxSplitBytes = sc.getConf.get(config.FILES_MAX_PARTITION_BYTES) +val openCostInBytes = sc.getConf.get(config.FILES_OPEN_COST_IN_BYTES) +val defaultParallelism = sc.defaultParallelism +val files = listStatus(context).asScala +val totalBytes = files.filterNot(_.isDirectory).map(_.getLen + openCostInBytes).sum +val bytesPerCore = totalBytes / defaultParallelism +val maxSplitSize = Math.min(defaultMaxSplitBytes, Math.max(openCostInBytes, bytesPerCore)) super.setMaxSplitSize(maxSplitSize) } http://git-wip-us.apache.org/repos/asf/spark/blob/6f369713/core/src/main/scala/org/apache/spark/internal/config/package.scala -- diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 497ca92..4a3e3d5 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -206,4 +206,17 @@ package object config { "encountering corrupt files and contents that have been read will still be returned.") .booleanConf .createWithDefault(false) + + private[spark] val FILES_MAX_PARTITION_BYTES = ConfigBuilder("spark.files.maxPartitionBytes") +.doc("The maximum number of bytes to pack into a single p
spark git commit: [SPARK-18217][SQL] Disallow creating permanent views based on temporary views or UDFs
Repository: spark Updated Branches: refs/heads/master c1a0c66bd -> 1da64e1fa [SPARK-18217][SQL] Disallow creating permanent views based on temporary views or UDFs ### What changes were proposed in this pull request? Based on the discussion in [SPARK-18209](https://issues.apache.org/jira/browse/SPARK-18209). It doesn't really make sense to create permanent views based on temporary views or temporary UDFs. To disallow the supports and issue the exceptions, this PR needs to detect whether a temporary view/UDF is being used when defining a permanent view. Basically, this PR can be split to two sub-tasks: **Task 1:** detecting a temporary view from the query plan of view definition. When finding an unresolved temporary view, Analyzer replaces it by a `SubqueryAlias` with the corresponding logical plan, which is stored in an in-memory HashMap. After replacement, it is impossible to detect whether the `SubqueryAlias` is added/generated from a temporary view. Thus, to detect the usage of a temporary view in view definition, this PR traverses the unresolved logical plan and uses the name of an `UnresolvedRelation` to detect whether it is a (global) temporary view. **Task 2:** detecting a temporary UDF from the query plan of view definition. Detecting usage of a temporary UDF in view definition is not straightfoward. First, in the analyzed plan, we are having different forms to represent the functions. More importantly, some classes (e.g., `HiveGenericUDF`) are not accessible from `CreateViewCommand`, which is part of `sql/core`. Thus, we used the unanalyzed plan `child` of `CreateViewCommand` to detect the usage of a temporary UDF. Because the plan has already been successfully analyzed, we can assume the functions have been defined/registered. Second, in Spark, the functions have four forms: Spark built-in functions, built-in hash functions, permanent UDFs and temporary UDFs. We do not have any direct way to determine whether a function is temporary or not. Thus, we introduced a function `isTemporaryFunction` in `SessionCatalog`. This function contains the detailed logics to determine whether a function is temporary or not. ### How was this patch tested? Added test cases. Author: gatorsmileCloses #15764 from gatorsmile/blockTempFromPermViewCreation. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1da64e1f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1da64e1f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1da64e1f Branch: refs/heads/master Commit: 1da64e1fa0970277d1fb47dec8adca47b068b1ec Parents: c1a0c66 Author: gatorsmile Authored: Mon Nov 7 18:34:21 2016 -0800 Committer: Reynold Xin Committed: Mon Nov 7 18:34:21 2016 -0800 -- .../sql/catalyst/catalog/SessionCatalog.scala | 18 .../catalyst/catalog/SessionCatalogSuite.scala | 28 ++ .../spark/sql/execution/command/views.scala | 38 +++- .../spark/sql/hive/HiveSessionCatalog.scala | 1 + .../spark/sql/hive/execution/SQLViewSuite.scala | 99 ++-- 5 files changed, 172 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1da64e1f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 2d2120d..c8b61d8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -923,6 +923,24 @@ class SessionCatalog( } } + /** + * Returns whether it is a temporary function. If not existed, returns false. + */ + def isTemporaryFunction(name: FunctionIdentifier): Boolean = { +// copied from HiveSessionCatalog +val hiveFunctions = Seq( + "hash", + "histogram_numeric", + "percentile") + +// A temporary function is a function that has been registered in functionRegistry +// without a database name, and is neither a built-in function nor a Hive function +name.database.isEmpty && + functionRegistry.functionExists(name.funcName) && + !FunctionRegistry.builtin.functionExists(name.funcName) && + !hiveFunctions.contains(name.funcName.toLowerCase) + } + protected def failFunctionLookup(name: String): Nothing = { throw new NoSuchFunctionException(db = currentDb, func = name) }
spark git commit: [SPARK-18217][SQL] Disallow creating permanent views based on temporary views or UDFs
Repository: spark Updated Branches: refs/heads/branch-2.1 4943929d8 -> 4cb4e5ff0 [SPARK-18217][SQL] Disallow creating permanent views based on temporary views or UDFs ### What changes were proposed in this pull request? Based on the discussion in [SPARK-18209](https://issues.apache.org/jira/browse/SPARK-18209). It doesn't really make sense to create permanent views based on temporary views or temporary UDFs. To disallow the supports and issue the exceptions, this PR needs to detect whether a temporary view/UDF is being used when defining a permanent view. Basically, this PR can be split to two sub-tasks: **Task 1:** detecting a temporary view from the query plan of view definition. When finding an unresolved temporary view, Analyzer replaces it by a `SubqueryAlias` with the corresponding logical plan, which is stored in an in-memory HashMap. After replacement, it is impossible to detect whether the `SubqueryAlias` is added/generated from a temporary view. Thus, to detect the usage of a temporary view in view definition, this PR traverses the unresolved logical plan and uses the name of an `UnresolvedRelation` to detect whether it is a (global) temporary view. **Task 2:** detecting a temporary UDF from the query plan of view definition. Detecting usage of a temporary UDF in view definition is not straightfoward. First, in the analyzed plan, we are having different forms to represent the functions. More importantly, some classes (e.g., `HiveGenericUDF`) are not accessible from `CreateViewCommand`, which is part of `sql/core`. Thus, we used the unanalyzed plan `child` of `CreateViewCommand` to detect the usage of a temporary UDF. Because the plan has already been successfully analyzed, we can assume the functions have been defined/registered. Second, in Spark, the functions have four forms: Spark built-in functions, built-in hash functions, permanent UDFs and temporary UDFs. We do not have any direct way to determine whether a function is temporary or not. Thus, we introduced a function `isTemporaryFunction` in `SessionCatalog`. This function contains the detailed logics to determine whether a function is temporary or not. ### How was this patch tested? Added test cases. Author: gatorsmileCloses #15764 from gatorsmile/blockTempFromPermViewCreation. (cherry picked from commit 1da64e1fa0970277d1fb47dec8adca47b068b1ec) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4cb4e5ff Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4cb4e5ff Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4cb4e5ff Branch: refs/heads/branch-2.1 Commit: 4cb4e5ff0ab9537758bf0b418ddd40dfe9537609 Parents: 4943929 Author: gatorsmile Authored: Mon Nov 7 18:34:21 2016 -0800 Committer: Reynold Xin Committed: Mon Nov 7 18:34:29 2016 -0800 -- .../sql/catalyst/catalog/SessionCatalog.scala | 18 .../catalyst/catalog/SessionCatalogSuite.scala | 28 ++ .../spark/sql/execution/command/views.scala | 38 +++- .../spark/sql/hive/HiveSessionCatalog.scala | 1 + .../spark/sql/hive/execution/SQLViewSuite.scala | 99 ++-- 5 files changed, 172 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4cb4e5ff/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 2d2120d..c8b61d8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -923,6 +923,24 @@ class SessionCatalog( } } + /** + * Returns whether it is a temporary function. If not existed, returns false. + */ + def isTemporaryFunction(name: FunctionIdentifier): Boolean = { +// copied from HiveSessionCatalog +val hiveFunctions = Seq( + "hash", + "histogram_numeric", + "percentile") + +// A temporary function is a function that has been registered in functionRegistry +// without a database name, and is neither a built-in function nor a Hive function +name.database.isEmpty && + functionRegistry.functionExists(name.funcName) && + !FunctionRegistry.builtin.functionExists(name.funcName) && + !hiveFunctions.contains(name.funcName.toLowerCase) + } + protected def failFunctionLookup(name: String): Nothing = { throw new NoSuchFunctionException(db = currentDb,
spark git commit: [SPARK-18261][STRUCTURED STREAMING] Add statistics to MemorySink for joining
Repository: spark Updated Branches: refs/heads/branch-2.1 29f59c733 -> 4943929d8 [SPARK-18261][STRUCTURED STREAMING] Add statistics to MemorySink for joining ## What changes were proposed in this pull request? Right now, there is no way to join the output of a memory sink with any table: > UnsupportedOperationException: LeafNode MemoryPlan must implement statistics This patch adds statistics to MemorySink, making joining snapshots of memory streams with tables possible. ## How was this patch tested? Added a test case. Author: Liwei LinCloses #15786 from lw-lin/memory-sink-stat. (cherry picked from commit c1a0c66bd2662bc40f312da474c3b95229fe92d0) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4943929d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4943929d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4943929d Branch: refs/heads/branch-2.1 Commit: 4943929d85a2aaf404c140d2d2589a597f484976 Parents: 29f59c7 Author: Liwei Lin Authored: Mon Nov 7 17:49:24 2016 -0800 Committer: Reynold Xin Committed: Mon Nov 7 17:49:48 2016 -0800 -- .../spark/sql/execution/streaming/memory.scala | 6 +- .../spark/sql/streaming/MemorySinkSuite.scala | 16 2 files changed, 21 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4943929d/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala index 48d9791..613c7cc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala @@ -27,7 +27,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.encoders.encoderFor import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.plans.logical.LeafNode +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils @@ -212,4 +212,8 @@ class MemorySink(val schema: StructType, outputMode: OutputMode) extends Sink wi */ case class MemoryPlan(sink: MemorySink, output: Seq[Attribute]) extends LeafNode { def this(sink: MemorySink) = this(sink, sink.schema.toAttributes) + + private val sizePerRow = sink.schema.toAttributes.map(_.dataType.defaultSize).sum + + override def statistics: Statistics = Statistics(sizePerRow * sink.allData.size) } http://git-wip-us.apache.org/repos/asf/spark/blob/4943929d/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala index 310d756..4e9fba9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala @@ -187,6 +187,22 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter { query.stop() } + test("MemoryPlan statistics") { +implicit val schema = new StructType().add(new StructField("value", IntegerType)) +val sink = new MemorySink(schema, InternalOutputModes.Append) +val plan = new MemoryPlan(sink) + +// Before adding data, check output +checkAnswer(sink.allData, Seq.empty) +assert(plan.statistics.sizeInBytes === 0) + +sink.addBatch(0, 1 to 3) +assert(plan.statistics.sizeInBytes === 12) + +sink.addBatch(1, 4 to 6) +assert(plan.statistics.sizeInBytes === 24) + } + ignore("stress test") { // Ignore the stress test as it takes several minutes to run (0 until 1000).foreach { _ => - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18261][STRUCTURED STREAMING] Add statistics to MemorySink for joining
Repository: spark Updated Branches: refs/heads/master 9b0593d5e -> c1a0c66bd [SPARK-18261][STRUCTURED STREAMING] Add statistics to MemorySink for joining ## What changes were proposed in this pull request? Right now, there is no way to join the output of a memory sink with any table: > UnsupportedOperationException: LeafNode MemoryPlan must implement statistics This patch adds statistics to MemorySink, making joining snapshots of memory streams with tables possible. ## How was this patch tested? Added a test case. Author: Liwei LinCloses #15786 from lw-lin/memory-sink-stat. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c1a0c66b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c1a0c66b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c1a0c66b Branch: refs/heads/master Commit: c1a0c66bd2662bc40f312da474c3b95229fe92d0 Parents: 9b0593d Author: Liwei Lin Authored: Mon Nov 7 17:49:24 2016 -0800 Committer: Reynold Xin Committed: Mon Nov 7 17:49:24 2016 -0800 -- .../spark/sql/execution/streaming/memory.scala | 6 +- .../spark/sql/streaming/MemorySinkSuite.scala | 16 2 files changed, 21 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c1a0c66b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala index 48d9791..613c7cc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala @@ -27,7 +27,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.encoders.encoderFor import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.plans.logical.LeafNode +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils @@ -212,4 +212,8 @@ class MemorySink(val schema: StructType, outputMode: OutputMode) extends Sink wi */ case class MemoryPlan(sink: MemorySink, output: Seq[Attribute]) extends LeafNode { def this(sink: MemorySink) = this(sink, sink.schema.toAttributes) + + private val sizePerRow = sink.schema.toAttributes.map(_.dataType.defaultSize).sum + + override def statistics: Statistics = Statistics(sizePerRow * sink.allData.size) } http://git-wip-us.apache.org/repos/asf/spark/blob/c1a0c66b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala index 310d756..4e9fba9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala @@ -187,6 +187,22 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter { query.stop() } + test("MemoryPlan statistics") { +implicit val schema = new StructType().add(new StructField("value", IntegerType)) +val sink = new MemorySink(schema, InternalOutputModes.Append) +val plan = new MemoryPlan(sink) + +// Before adding data, check output +checkAnswer(sink.allData, Seq.empty) +assert(plan.statistics.sizeInBytes === 0) + +sink.addBatch(0, 1 to 3) +assert(plan.statistics.sizeInBytes === 12) + +sink.addBatch(1, 4 to 6) +assert(plan.statistics.sizeInBytes === 24) + } + ignore("stress test") { // Ignore the stress test as it takes several minutes to run (0 until 1000).foreach { _ => - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18086] Add support for Hive session vars.
Repository: spark Updated Branches: refs/heads/branch-2.1 4af82d56f -> 29f59c733 [SPARK-18086] Add support for Hive session vars. ## What changes were proposed in this pull request? This adds support for Hive variables: * Makes values set via `spark-sql --hivevar name=value` accessible * Adds `getHiveVar` and `setHiveVar` to the `HiveClient` interface * Adds a SessionVariables trait for sessions like Hive that support variables (including Hive vars) * Adds SessionVariables support to variable substitution * Adds SessionVariables support to the SET command ## How was this patch tested? * Adds a test to all supported Hive versions for accessing Hive variables * Adds HiveVariableSubstitutionSuite Author: Ryan BlueCloses #15738 from rdblue/SPARK-18086-add-hivevar-support. (cherry picked from commit 9b0593d5e99bb919c4abb8d0836a126ec2eaf1d5) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/29f59c73 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/29f59c73 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/29f59c73 Branch: refs/heads/branch-2.1 Commit: 29f59c73301628fb63086660f64fdb5272a312fe Parents: 4af82d5 Author: Ryan Blue Authored: Mon Nov 7 17:36:15 2016 -0800 Committer: Reynold Xin Committed: Mon Nov 7 17:36:22 2016 -0800 -- .../sql/execution/command/SetCommand.scala | 11 + .../sql/internal/VariableSubstitution.scala | 5 +- .../hive/thriftserver/SparkSQLCLIDriver.scala | 6 ++- .../hive/HiveVariableSubstitutionSuite.scala| 50 4 files changed, 67 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/29f59c73/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala index af6def5..dc8d975 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala @@ -60,6 +60,13 @@ case class SetCommand(kv: Option[(String, Option[String])]) extends RunnableComm } (keyValueOutput, runFunc) +case Some((key @ SetCommand.VariableName(name), Some(value))) => + val runFunc = (sparkSession: SparkSession) => { +sparkSession.conf.set(name, value) +Seq(Row(key, value)) + } + (keyValueOutput, runFunc) + // Configures a single property. case Some((key, Some(value))) => val runFunc = (sparkSession: SparkSession) => { @@ -117,6 +124,10 @@ case class SetCommand(kv: Option[(String, Option[String])]) extends RunnableComm } +object SetCommand { + val VariableName = """hivevar:([^=]+)""".r +} + /** * This command is for resetting SQLConf to the default values. Command that runs * {{{ http://git-wip-us.apache.org/repos/asf/spark/blob/29f59c73/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala index 50725a0..791a9cf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala @@ -17,10 +17,7 @@ package org.apache.spark.sql.internal -import java.util.regex.Pattern - import org.apache.spark.internal.config._ -import org.apache.spark.sql.AnalysisException /** * A helper class that enables substitution using syntax like @@ -37,6 +34,7 @@ class VariableSubstitution(conf: SQLConf) { private val reader = new ConfigReader(provider) .bind("spark", provider) .bind("sparkconf", provider) +.bind("hivevar", provider) .bind("hiveconf", provider) /** @@ -49,5 +47,4 @@ class VariableSubstitution(conf: SQLConf) { input } } - } http://git-wip-us.apache.org/repos/asf/spark/blob/29f59c73/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 5dafec1..0c79b6f 100644 ---
spark git commit: [SPARK-18086] Add support for Hive session vars.
Repository: spark Updated Branches: refs/heads/master 3eda05703 -> 9b0593d5e [SPARK-18086] Add support for Hive session vars. ## What changes were proposed in this pull request? This adds support for Hive variables: * Makes values set via `spark-sql --hivevar name=value` accessible * Adds `getHiveVar` and `setHiveVar` to the `HiveClient` interface * Adds a SessionVariables trait for sessions like Hive that support variables (including Hive vars) * Adds SessionVariables support to variable substitution * Adds SessionVariables support to the SET command ## How was this patch tested? * Adds a test to all supported Hive versions for accessing Hive variables * Adds HiveVariableSubstitutionSuite Author: Ryan BlueCloses #15738 from rdblue/SPARK-18086-add-hivevar-support. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b0593d5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b0593d5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b0593d5 Branch: refs/heads/master Commit: 9b0593d5e99bb919c4abb8d0836a126ec2eaf1d5 Parents: 3eda057 Author: Ryan Blue Authored: Mon Nov 7 17:36:15 2016 -0800 Committer: Reynold Xin Committed: Mon Nov 7 17:36:15 2016 -0800 -- .../sql/execution/command/SetCommand.scala | 11 + .../sql/internal/VariableSubstitution.scala | 5 +- .../hive/thriftserver/SparkSQLCLIDriver.scala | 6 ++- .../hive/HiveVariableSubstitutionSuite.scala| 50 4 files changed, 67 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9b0593d5/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala index af6def5..dc8d975 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala @@ -60,6 +60,13 @@ case class SetCommand(kv: Option[(String, Option[String])]) extends RunnableComm } (keyValueOutput, runFunc) +case Some((key @ SetCommand.VariableName(name), Some(value))) => + val runFunc = (sparkSession: SparkSession) => { +sparkSession.conf.set(name, value) +Seq(Row(key, value)) + } + (keyValueOutput, runFunc) + // Configures a single property. case Some((key, Some(value))) => val runFunc = (sparkSession: SparkSession) => { @@ -117,6 +124,10 @@ case class SetCommand(kv: Option[(String, Option[String])]) extends RunnableComm } +object SetCommand { + val VariableName = """hivevar:([^=]+)""".r +} + /** * This command is for resetting SQLConf to the default values. Command that runs * {{{ http://git-wip-us.apache.org/repos/asf/spark/blob/9b0593d5/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala index 50725a0..791a9cf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala @@ -17,10 +17,7 @@ package org.apache.spark.sql.internal -import java.util.regex.Pattern - import org.apache.spark.internal.config._ -import org.apache.spark.sql.AnalysisException /** * A helper class that enables substitution using syntax like @@ -37,6 +34,7 @@ class VariableSubstitution(conf: SQLConf) { private val reader = new ConfigReader(provider) .bind("spark", provider) .bind("sparkconf", provider) +.bind("hivevar", provider) .bind("hiveconf", provider) /** @@ -49,5 +47,4 @@ class VariableSubstitution(conf: SQLConf) { input } } - } http://git-wip-us.apache.org/repos/asf/spark/blob/9b0593d5/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 5dafec1..0c79b6f 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++
spark git commit: [SPARK-16904][SQL] Removal of Hive Built-in Hash Functions and TestHiveFunctionRegistry
Repository: spark Updated Branches: refs/heads/branch-2.1 2fa1a632a -> 410102957 [SPARK-16904][SQL] Removal of Hive Built-in Hash Functions and TestHiveFunctionRegistry ### What changes were proposed in this pull request? Currently, the Hive built-in `hash` function is not being used in Spark since Spark 2.0. The public interface does not allow users to unregister the Spark built-in functions. Thus, users will never use Hive's built-in `hash` function. The only exception here is `TestHiveFunctionRegistry`, which allows users to unregister the built-in functions. Thus, we can load Hive's hash function in the test cases. If we disable it, 10+ test cases will fail because the results are different from the Hive golden answer files. This PR is to remove `hash` from the list of `hiveFunctions` in `HiveSessionCatalog`. It will also remove `TestHiveFunctionRegistry`. This removal makes us easier to remove `TestHiveSessionState` in the future. ### How was this patch tested? N/A Author: gatorsmileCloses #14498 from gatorsmile/removeHash. (cherry picked from commit 57626a55703a189e03148398f67c36cd0e557044) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41010295 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41010295 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41010295 Branch: refs/heads/branch-2.1 Commit: 4101029579de920215b426ca6537c1f0e4e4e5ae Parents: 2fa1a63 Author: gatorsmile Authored: Mon Nov 7 01:16:37 2016 -0800 Committer: Reynold Xin Committed: Mon Nov 7 01:16:43 2016 -0800 -- .../hive/execution/HiveCompatibilitySuite.scala | 41 ++-- .../spark/sql/hive/HiveSessionCatalog.scala | 1 - .../apache/spark/sql/hive/test/TestHive.scala | 28 - 3 files changed, 20 insertions(+), 50 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/41010295/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala -- diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index f5d10de..5cd4935 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -57,8 +57,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, 5) // Enable in-memory partition pruning for testing purposes TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true) -// Use Hive hash expression instead of the native one -TestHive.sessionState.functionRegistry.unregisterFunction("hash") // Ensures that the plans generation use metastore relation and not OrcRelation // Was done because SqlBuilder does not work with plans having logical relation TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false) @@ -76,7 +74,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning) TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc) TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled) - TestHive.sessionState.functionRegistry.restore() // For debugging dump some statistics about how much time was spent in various optimizer rules logWarning(RuleExecutor.dumpTimeSpent()) @@ -581,7 +578,26 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "auto_join6", "auto_join7", "auto_join8", -"auto_join9" +"auto_join9", + +// These tests are based on the Hive's hash function, which is different from Spark +"auto_join19", +"auto_join22", +"auto_join25", +"auto_join26", +"auto_join27", +"auto_join28", +"auto_join30", +"auto_join31", +"auto_join_nulls", +"auto_join_reordering_values", +"correlationoptimizer1", +"correlationoptimizer2", +"correlationoptimizer3", +"correlationoptimizer4", +"multiMapJoin1", +"orc_dictionary_threshold", +"udf_hash" ) /** @@ -601,16 +617,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "annotate_stats_part", "annotate_stats_table", "annotate_stats_union", -"auto_join19", -"auto_join22", -"auto_join25", -
spark git commit: [SPARK-16904][SQL] Removal of Hive Built-in Hash Functions and TestHiveFunctionRegistry
Repository: spark Updated Branches: refs/heads/master 9db06c442 -> 57626a557 [SPARK-16904][SQL] Removal of Hive Built-in Hash Functions and TestHiveFunctionRegistry ### What changes were proposed in this pull request? Currently, the Hive built-in `hash` function is not being used in Spark since Spark 2.0. The public interface does not allow users to unregister the Spark built-in functions. Thus, users will never use Hive's built-in `hash` function. The only exception here is `TestHiveFunctionRegistry`, which allows users to unregister the built-in functions. Thus, we can load Hive's hash function in the test cases. If we disable it, 10+ test cases will fail because the results are different from the Hive golden answer files. This PR is to remove `hash` from the list of `hiveFunctions` in `HiveSessionCatalog`. It will also remove `TestHiveFunctionRegistry`. This removal makes us easier to remove `TestHiveSessionState` in the future. ### How was this patch tested? N/A Author: gatorsmileCloses #14498 from gatorsmile/removeHash. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/57626a55 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/57626a55 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/57626a55 Branch: refs/heads/master Commit: 57626a55703a189e03148398f67c36cd0e557044 Parents: 9db06c4 Author: gatorsmile Authored: Mon Nov 7 01:16:37 2016 -0800 Committer: Reynold Xin Committed: Mon Nov 7 01:16:37 2016 -0800 -- .../hive/execution/HiveCompatibilitySuite.scala | 41 ++-- .../spark/sql/hive/HiveSessionCatalog.scala | 1 - .../apache/spark/sql/hive/test/TestHive.scala | 28 - 3 files changed, 20 insertions(+), 50 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/57626a55/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala -- diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index f5d10de..5cd4935 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -57,8 +57,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, 5) // Enable in-memory partition pruning for testing purposes TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true) -// Use Hive hash expression instead of the native one -TestHive.sessionState.functionRegistry.unregisterFunction("hash") // Ensures that the plans generation use metastore relation and not OrcRelation // Was done because SqlBuilder does not work with plans having logical relation TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false) @@ -76,7 +74,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning) TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc) TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled) - TestHive.sessionState.functionRegistry.restore() // For debugging dump some statistics about how much time was spent in various optimizer rules logWarning(RuleExecutor.dumpTimeSpent()) @@ -581,7 +578,26 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "auto_join6", "auto_join7", "auto_join8", -"auto_join9" +"auto_join9", + +// These tests are based on the Hive's hash function, which is different from Spark +"auto_join19", +"auto_join22", +"auto_join25", +"auto_join26", +"auto_join27", +"auto_join28", +"auto_join30", +"auto_join31", +"auto_join_nulls", +"auto_join_reordering_values", +"correlationoptimizer1", +"correlationoptimizer2", +"correlationoptimizer3", +"correlationoptimizer4", +"multiMapJoin1", +"orc_dictionary_threshold", +"udf_hash" ) /** @@ -601,16 +617,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "annotate_stats_part", "annotate_stats_table", "annotate_stats_union", -"auto_join19", -"auto_join22", -"auto_join25", -"auto_join26", -"auto_join27", -"auto_join28", -"auto_join30", -"auto_join31", -"auto_join_nulls", -
[2/2] spark git commit: [SPARK-18296][SQL] Use consistent naming for expression test suites
[SPARK-18296][SQL] Use consistent naming for expression test suites ## What changes were proposed in this pull request? We have an undocumented naming convention to call expression unit tests ExpressionsSuite, and the end-to-end tests FunctionsSuite. It'd be great to make all test suites consistent with this naming convention. ## How was this patch tested? This is a test-only naming change. Author: Reynold Xin <r...@databricks.com> Closes #15793 from rxin/SPARK-18296. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9db06c44 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9db06c44 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9db06c44 Branch: refs/heads/master Commit: 9db06c442cf85e41d51c7b167817f4e7971bf0da Parents: 07ac3f0 Author: Reynold Xin <r...@databricks.com> Authored: Sun Nov 6 22:44:55 2016 -0800 Committer: Reynold Xin <r...@databricks.com> Committed: Sun Nov 6 22:44:55 2016 -0800 -- .../expressions/BitwiseExpressionsSuite.scala | 134 + .../expressions/BitwiseFunctionsSuite.scala | 134 - .../CollectionExpressionsSuite.scala| 108 .../expressions/CollectionFunctionsSuite.scala | 109 .../expressions/MathExpressionsSuite.scala | 582 +++ .../expressions/MathFunctionsSuite.scala| 582 --- .../expressions/MiscExpressionsSuite.scala | 42 ++ .../expressions/MiscFunctionsSuite.scala| 42 -- .../expressions/NullExpressionsSuite.scala | 136 + .../expressions/NullFunctionsSuite.scala| 136 - .../apache/spark/sql/MathExpressionsSuite.scala | 424 -- .../apache/spark/sql/MathFunctionsSuite.scala | 424 ++ 12 files changed, 1426 insertions(+), 1427 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9db06c44/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala new file mode 100644 index 000..4188dad --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.types._ + + +class BitwiseExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { + + import IntegralLiteralTestUtils._ + + test("BitwiseNOT") { +def check(input: Any, expected: Any): Unit = { + val expr = BitwiseNot(Literal(input)) + assert(expr.dataType === Literal(input).dataType) + checkEvaluation(expr, expected) +} + +// Need the extra toByte even though IntelliJ thought it's not needed. +check(1.toByte, (~1.toByte).toByte) +check(1000.toShort, (~1000.toShort).toShort) +check(100, ~100) +check(123456789123L, ~123456789123L) + +checkEvaluation(BitwiseNot(Literal.create(null, IntegerType)), null) +checkEvaluation(BitwiseNot(positiveShortLit), (~positiveShort).toShort) +checkEvaluation(BitwiseNot(negativeShortLit), (~negativeShort).toShort) +checkEvaluation(BitwiseNot(positiveIntLit), ~positiveInt) +checkEvaluation(BitwiseNot(negativeIntLit), ~negativeInt) +checkEvaluation(BitwiseNot(positiveLongLit), ~positiveLong) +checkEvaluation(BitwiseNot(negativeLongLit), ~negativeLong) + +DataTypeTestUtils.integralType.foreach { dt => + checkConsistencyBetweenInterpretedAndCodegen(BitwiseNot, dt) +} + } + + test("BitwiseAnd") { +def check(input1: Any, input2: Any, expected: Any): Unit = { + val expr = BitwiseAnd(Literal(input1), Literal(input2)) + assert(expr.
[2/2] spark git commit: [SPARK-18296][SQL] Use consistent naming for expression test suites
[SPARK-18296][SQL] Use consistent naming for expression test suites ## What changes were proposed in this pull request? We have an undocumented naming convention to call expression unit tests ExpressionsSuite, and the end-to-end tests FunctionsSuite. It'd be great to make all test suites consistent with this naming convention. ## How was this patch tested? This is a test-only naming change. Author: Reynold Xin <r...@databricks.com> Closes #15793 from rxin/SPARK-18296. (cherry picked from commit 9db06c442cf85e41d51c7b167817f4e7971bf0da) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2fa1a632 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2fa1a632 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2fa1a632 Branch: refs/heads/branch-2.1 Commit: 2fa1a632ae4e68ffa01fad0d6150219c13355724 Parents: 9ebd5e5 Author: Reynold Xin <r...@databricks.com> Authored: Sun Nov 6 22:44:55 2016 -0800 Committer: Reynold Xin <r...@databricks.com> Committed: Sun Nov 6 22:45:02 2016 -0800 -- .../expressions/BitwiseExpressionsSuite.scala | 134 + .../expressions/BitwiseFunctionsSuite.scala | 134 - .../CollectionExpressionsSuite.scala| 108 .../expressions/CollectionFunctionsSuite.scala | 109 .../expressions/MathExpressionsSuite.scala | 582 +++ .../expressions/MathFunctionsSuite.scala| 582 --- .../expressions/MiscExpressionsSuite.scala | 42 ++ .../expressions/MiscFunctionsSuite.scala| 42 -- .../expressions/NullExpressionsSuite.scala | 136 + .../expressions/NullFunctionsSuite.scala| 136 - .../apache/spark/sql/MathExpressionsSuite.scala | 424 -- .../apache/spark/sql/MathFunctionsSuite.scala | 424 ++ 12 files changed, 1426 insertions(+), 1427 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2fa1a632/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala new file mode 100644 index 000..4188dad --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.types._ + + +class BitwiseExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { + + import IntegralLiteralTestUtils._ + + test("BitwiseNOT") { +def check(input: Any, expected: Any): Unit = { + val expr = BitwiseNot(Literal(input)) + assert(expr.dataType === Literal(input).dataType) + checkEvaluation(expr, expected) +} + +// Need the extra toByte even though IntelliJ thought it's not needed. +check(1.toByte, (~1.toByte).toByte) +check(1000.toShort, (~1000.toShort).toShort) +check(100, ~100) +check(123456789123L, ~123456789123L) + +checkEvaluation(BitwiseNot(Literal.create(null, IntegerType)), null) +checkEvaluation(BitwiseNot(positiveShortLit), (~positiveShort).toShort) +checkEvaluation(BitwiseNot(negativeShortLit), (~negativeShort).toShort) +checkEvaluation(BitwiseNot(positiveIntLit), ~positiveInt) +checkEvaluation(BitwiseNot(negativeIntLit), ~negativeInt) +checkEvaluation(BitwiseNot(positiveLongLit), ~positiveLong) +checkEvaluation(BitwiseNot(negativeLongLit), ~negativeLong) + +DataTypeTestUtils.integralType.foreach { dt => + checkConsistencyBetweenInterpretedAndCodegen(BitwiseNot, dt) +} + } + + test("BitwiseAnd") { +def check
[1/2] spark git commit: [SPARK-18296][SQL] Use consistent naming for expression test suites
Repository: spark Updated Branches: refs/heads/branch-2.1 9ebd5e563 -> 2fa1a632a http://git-wip-us.apache.org/repos/asf/spark/blob/2fa1a632/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala deleted file mode 100644 index 62c9ab3..000 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.expressions - -import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer -import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull -import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project} -import org.apache.spark.sql.types._ - -class NullFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { - - def testAllTypes(testFunc: (Any, DataType) => Unit): Unit = { -testFunc(false, BooleanType) -testFunc(1.toByte, ByteType) -testFunc(1.toShort, ShortType) -testFunc(1, IntegerType) -testFunc(1L, LongType) -testFunc(1.0F, FloatType) -testFunc(1.0, DoubleType) -testFunc(Decimal(1.5), DecimalType(2, 1)) -testFunc(new java.sql.Date(10), DateType) -testFunc(new java.sql.Timestamp(10), TimestampType) -testFunc("abcd", StringType) - } - - test("isnull and isnotnull") { -testAllTypes { (value: Any, tpe: DataType) => - checkEvaluation(IsNull(Literal.create(value, tpe)), false) - checkEvaluation(IsNotNull(Literal.create(value, tpe)), true) - checkEvaluation(IsNull(Literal.create(null, tpe)), true) - checkEvaluation(IsNotNull(Literal.create(null, tpe)), false) -} - } - - test("AssertNotNUll") { -val ex = intercept[RuntimeException] { - evaluate(AssertNotNull(Literal(null), Seq.empty[String])) -}.getMessage -assert(ex.contains("Null value appeared in non-nullable field")) - } - - test("IsNaN") { -checkEvaluation(IsNaN(Literal(Double.NaN)), true) -checkEvaluation(IsNaN(Literal(Float.NaN)), true) -checkEvaluation(IsNaN(Literal(math.log(-3))), true) -checkEvaluation(IsNaN(Literal.create(null, DoubleType)), false) -checkEvaluation(IsNaN(Literal(Double.PositiveInfinity)), false) -checkEvaluation(IsNaN(Literal(Float.MaxValue)), false) -checkEvaluation(IsNaN(Literal(5.5f)), false) - } - - test("nanvl") { -checkEvaluation(NaNvl(Literal(5.0), Literal.create(null, DoubleType)), 5.0) -checkEvaluation(NaNvl(Literal.create(null, DoubleType), Literal(5.0)), null) -checkEvaluation(NaNvl(Literal.create(null, DoubleType), Literal(Double.NaN)), null) -checkEvaluation(NaNvl(Literal(Double.NaN), Literal(5.0)), 5.0) -checkEvaluation(NaNvl(Literal(Double.NaN), Literal.create(null, DoubleType)), null) -assert(NaNvl(Literal(Double.NaN), Literal(Double.NaN)). - eval(EmptyRow).asInstanceOf[Double].isNaN) - } - - test("coalesce") { -testAllTypes { (value: Any, tpe: DataType) => - val lit = Literal.create(value, tpe) - val nullLit = Literal.create(null, tpe) - checkEvaluation(Coalesce(Seq(nullLit)), null) - checkEvaluation(Coalesce(Seq(lit)), value) - checkEvaluation(Coalesce(Seq(nullLit, lit)), value) - checkEvaluation(Coalesce(Seq(nullLit, lit, lit)), value) - checkEvaluation(Coalesce(Seq(nullLit, nullLit, lit)), value) -} - } - - test("SPARK-16602 Nvl should support numeric-string cases") { -def analyze(expr: Expression): Expression = { - val relation = LocalRelation() - SimpleAnalyzer.execute(Project(Seq(Alias(expr, "c")()), relation)).expressions.head -} - -val intLit = Literal.create(1, IntegerType) -val doubleLit = Literal.create(2.2, DoubleType) -val stringLit = Literal.create("c", StringType) -val nullLit = Literal.create(null, NullType) - -assert(analyze(new Nvl(intLit, doubleLit)).dataType ==
[1/2] spark git commit: [SPARK-18296][SQL] Use consistent naming for expression test suites
Repository: spark Updated Branches: refs/heads/master 07ac3f09d -> 9db06c442 http://git-wip-us.apache.org/repos/asf/spark/blob/9db06c44/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala deleted file mode 100644 index 62c9ab3..000 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.expressions - -import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer -import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull -import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project} -import org.apache.spark.sql.types._ - -class NullFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { - - def testAllTypes(testFunc: (Any, DataType) => Unit): Unit = { -testFunc(false, BooleanType) -testFunc(1.toByte, ByteType) -testFunc(1.toShort, ShortType) -testFunc(1, IntegerType) -testFunc(1L, LongType) -testFunc(1.0F, FloatType) -testFunc(1.0, DoubleType) -testFunc(Decimal(1.5), DecimalType(2, 1)) -testFunc(new java.sql.Date(10), DateType) -testFunc(new java.sql.Timestamp(10), TimestampType) -testFunc("abcd", StringType) - } - - test("isnull and isnotnull") { -testAllTypes { (value: Any, tpe: DataType) => - checkEvaluation(IsNull(Literal.create(value, tpe)), false) - checkEvaluation(IsNotNull(Literal.create(value, tpe)), true) - checkEvaluation(IsNull(Literal.create(null, tpe)), true) - checkEvaluation(IsNotNull(Literal.create(null, tpe)), false) -} - } - - test("AssertNotNUll") { -val ex = intercept[RuntimeException] { - evaluate(AssertNotNull(Literal(null), Seq.empty[String])) -}.getMessage -assert(ex.contains("Null value appeared in non-nullable field")) - } - - test("IsNaN") { -checkEvaluation(IsNaN(Literal(Double.NaN)), true) -checkEvaluation(IsNaN(Literal(Float.NaN)), true) -checkEvaluation(IsNaN(Literal(math.log(-3))), true) -checkEvaluation(IsNaN(Literal.create(null, DoubleType)), false) -checkEvaluation(IsNaN(Literal(Double.PositiveInfinity)), false) -checkEvaluation(IsNaN(Literal(Float.MaxValue)), false) -checkEvaluation(IsNaN(Literal(5.5f)), false) - } - - test("nanvl") { -checkEvaluation(NaNvl(Literal(5.0), Literal.create(null, DoubleType)), 5.0) -checkEvaluation(NaNvl(Literal.create(null, DoubleType), Literal(5.0)), null) -checkEvaluation(NaNvl(Literal.create(null, DoubleType), Literal(Double.NaN)), null) -checkEvaluation(NaNvl(Literal(Double.NaN), Literal(5.0)), 5.0) -checkEvaluation(NaNvl(Literal(Double.NaN), Literal.create(null, DoubleType)), null) -assert(NaNvl(Literal(Double.NaN), Literal(Double.NaN)). - eval(EmptyRow).asInstanceOf[Double].isNaN) - } - - test("coalesce") { -testAllTypes { (value: Any, tpe: DataType) => - val lit = Literal.create(value, tpe) - val nullLit = Literal.create(null, tpe) - checkEvaluation(Coalesce(Seq(nullLit)), null) - checkEvaluation(Coalesce(Seq(lit)), value) - checkEvaluation(Coalesce(Seq(nullLit, lit)), value) - checkEvaluation(Coalesce(Seq(nullLit, lit, lit)), value) - checkEvaluation(Coalesce(Seq(nullLit, nullLit, lit)), value) -} - } - - test("SPARK-16602 Nvl should support numeric-string cases") { -def analyze(expr: Expression): Expression = { - val relation = LocalRelation() - SimpleAnalyzer.execute(Project(Seq(Alias(expr, "c")()), relation)).expressions.head -} - -val intLit = Literal.create(1, IntegerType) -val doubleLit = Literal.create(2.2, DoubleType) -val stringLit = Literal.create("c", StringType) -val nullLit = Literal.create(null, NullType) - -assert(analyze(new Nvl(intLit, doubleLit)).dataType ==
spark git commit: [SPARK-18167][SQL] Disable flaky hive partition pruning test.
Repository: spark Updated Branches: refs/heads/branch-2.1 9c78d355c -> 9ebd5e563 [SPARK-18167][SQL] Disable flaky hive partition pruning test. (cherry picked from commit 07ac3f09daf2b28436bc69f76badd1e36d756e4d) Signed-off-by: Reynold XinProject: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ebd5e56 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ebd5e56 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ebd5e56 Branch: refs/heads/branch-2.1 Commit: 9ebd5e563d26cf42b9d32e8926de109101360d43 Parents: 9c78d35 Author: Reynold Xin Authored: Sun Nov 6 22:42:05 2016 -0800 Committer: Reynold Xin Committed: Sun Nov 6 22:43:37 2016 -0800 -- .../scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9ebd5e56/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 28e5dff..5e08ef3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1569,7 +1569,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { ).map(i => Row(i._1, i._2, i._3, i._4))) } - test("SPARK-10562: partition by column with mixed case name") { + ignore("SPARK-10562: partition by column with mixed case name") { withTable("tbl10562") { val df = Seq(2012 -> "a").toDF("Year", "val") df.write.partitionBy("Year").saveAsTable("tbl10562") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18167][SQL] Disable flaky hive partition pruning test.
Repository: spark Updated Branches: refs/heads/master 46b2e4999 -> 07ac3f09d [SPARK-18167][SQL] Disable flaky hive partition pruning test. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/07ac3f09 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/07ac3f09 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/07ac3f09 Branch: refs/heads/master Commit: 07ac3f09daf2b28436bc69f76badd1e36d756e4d Parents: 46b2e49 Author: Reynold XinAuthored: Sun Nov 6 22:42:05 2016 -0800 Committer: Reynold Xin Committed: Sun Nov 6 22:42:05 2016 -0800 -- .../scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/07ac3f09/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 28e5dff..5e08ef3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1569,7 +1569,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { ).map(i => Row(i._1, i._2, i._3, i._4))) } - test("SPARK-10562: partition by column with mixed case name") { + ignore("SPARK-10562: partition by column with mixed case name") { withTable("tbl10562") { val df = Seq(2012 -> "a").toDF("Year", "val") df.write.partitionBy("Year").saveAsTable("tbl10562") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.0.2-rc1 [deleted] 1c2908eeb - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.6.3 [created] 1e8607474 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.6.3-rc1 [deleted] 7375bb0c8 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.6.3-rc2 [deleted] 1e8607474 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18173][SQL] data source tables should support truncating partition
Repository: spark Updated Branches: refs/heads/branch-2.1 a8fbcdbf2 -> 9c78d355c [SPARK-18173][SQL] data source tables should support truncating partition ## What changes were proposed in this pull request? Previously `TRUNCATE TABLE ... PARTITION` will always truncate the whole table for data source tables, this PR fixes it and improve `InMemoryCatalog` to make this command work with it. ## How was this patch tested? existing tests Author: Wenchen FanCloses #15688 from cloud-fan/truncate. (cherry picked from commit 46b2e499935386e28899d860110a6ab16c107c0c) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9c78d355 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9c78d355 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9c78d355 Branch: refs/heads/branch-2.1 Commit: 9c78d355c541c2abfb4945e5d67bf0d2ba4b4d16 Parents: a8fbcdb Author: Wenchen Fan Authored: Sun Nov 6 18:57:13 2016 -0800 Committer: Reynold Xin Committed: Sun Nov 6 18:57:25 2016 -0800 -- .../sql/catalyst/catalog/InMemoryCatalog.scala | 23 +-- .../catalyst/catalog/ExternalCatalogSuite.scala | 11 .../spark/sql/execution/command/tables.scala| 16 +++-- .../spark/sql/execution/command/DDLSuite.scala | 49 --- .../spark/sql/hive/execution/HiveDDLSuite.scala | 64 5 files changed, 146 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9c78d355/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index bc39688..20db81e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -487,11 +487,26 @@ class InMemoryCatalog( table: String, partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = synchronized { requireTableExists(db, table) -if (partialSpec.nonEmpty) { - throw new UnsupportedOperationException( -"listPartition with partial partition spec is not implemented") + +partialSpec match { + case None => catalog(db).tables(table).partitions.values.toSeq + case Some(partial) => +catalog(db).tables(table).partitions.toSeq.collect { + case (spec, partition) if isPartialPartitionSpec(partial, spec) => partition +} +} + } + + /** + * Returns true if `spec1` is a partial partition spec w.r.t. `spec2`, e.g. PARTITION (a=1) is a + * partial partition spec w.r.t. PARTITION (a=1,b=2). + */ + private def isPartialPartitionSpec( + spec1: TablePartitionSpec, + spec2: TablePartitionSpec): Boolean = { +spec1.forall { + case (partitionColumn, value) => spec2(partitionColumn) == value } -catalog(db).tables(table).partitions.values.toSeq } override def listPartitionsByFilter( http://git-wip-us.apache.org/repos/asf/spark/blob/9c78d355/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala index 66f92d1..34bdfc8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala @@ -320,6 +320,17 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac catalog.createPartitions("db2", "tbl2", Seq(part1), ignoreIfExists = true) } + test("list partitions with partial partition spec") { +val catalog = newBasicCatalog() +val parts = catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "1"))) +assert(parts.length == 1) +assert(parts.head.spec == part1.spec) + +// if no partition is matched for the given partition spec, an empty list should be returned. +assert(catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "unknown", "b" -> "1"))).isEmpty) +assert(catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "unknown"))).isEmpty) + } + test("drop partitions") { val catalog = newBasicCatalog() assert(catalogPartitionsEqual(catalog, "db2",
spark git commit: [SPARK-18173][SQL] data source tables should support truncating partition
Repository: spark Updated Branches: refs/heads/master 556a3b7d0 -> 46b2e4999 [SPARK-18173][SQL] data source tables should support truncating partition ## What changes were proposed in this pull request? Previously `TRUNCATE TABLE ... PARTITION` will always truncate the whole table for data source tables, this PR fixes it and improve `InMemoryCatalog` to make this command work with it. ## How was this patch tested? existing tests Author: Wenchen FanCloses #15688 from cloud-fan/truncate. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/46b2e499 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/46b2e499 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/46b2e499 Branch: refs/heads/master Commit: 46b2e499935386e28899d860110a6ab16c107c0c Parents: 556a3b7 Author: Wenchen Fan Authored: Sun Nov 6 18:57:13 2016 -0800 Committer: Reynold Xin Committed: Sun Nov 6 18:57:13 2016 -0800 -- .../sql/catalyst/catalog/InMemoryCatalog.scala | 23 +-- .../catalyst/catalog/ExternalCatalogSuite.scala | 11 .../spark/sql/execution/command/tables.scala| 16 +++-- .../spark/sql/execution/command/DDLSuite.scala | 49 --- .../spark/sql/hive/execution/HiveDDLSuite.scala | 64 5 files changed, 146 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/46b2e499/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index bc39688..20db81e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -487,11 +487,26 @@ class InMemoryCatalog( table: String, partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = synchronized { requireTableExists(db, table) -if (partialSpec.nonEmpty) { - throw new UnsupportedOperationException( -"listPartition with partial partition spec is not implemented") + +partialSpec match { + case None => catalog(db).tables(table).partitions.values.toSeq + case Some(partial) => +catalog(db).tables(table).partitions.toSeq.collect { + case (spec, partition) if isPartialPartitionSpec(partial, spec) => partition +} +} + } + + /** + * Returns true if `spec1` is a partial partition spec w.r.t. `spec2`, e.g. PARTITION (a=1) is a + * partial partition spec w.r.t. PARTITION (a=1,b=2). + */ + private def isPartialPartitionSpec( + spec1: TablePartitionSpec, + spec2: TablePartitionSpec): Boolean = { +spec1.forall { + case (partitionColumn, value) => spec2(partitionColumn) == value } -catalog(db).tables(table).partitions.values.toSeq } override def listPartitionsByFilter( http://git-wip-us.apache.org/repos/asf/spark/blob/46b2e499/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala index 66f92d1..34bdfc8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala @@ -320,6 +320,17 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac catalog.createPartitions("db2", "tbl2", Seq(part1), ignoreIfExists = true) } + test("list partitions with partial partition spec") { +val catalog = newBasicCatalog() +val parts = catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "1"))) +assert(parts.length == 1) +assert(parts.head.spec == part1.spec) + +// if no partition is matched for the given partition spec, an empty list should be returned. +assert(catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "unknown", "b" -> "1"))).isEmpty) +assert(catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "unknown"))).isEmpty) + } + test("drop partitions") { val catalog = newBasicCatalog() assert(catalogPartitionsEqual(catalog, "db2", "tbl2", Seq(part1, part2)))
spark git commit: [SPARK-18269][SQL] CSV datasource should read null properly when schema is lager than parsed tokens
Repository: spark Updated Branches: refs/heads/branch-2.1 d2f2cf68a -> a8fbcdbf2 [SPARK-18269][SQL] CSV datasource should read null properly when schema is lager than parsed tokens ## What changes were proposed in this pull request? Currently, there are the three cases when reading CSV by datasource when it is `PERMISSIVE` parse mode. - schema == parsed tokens (from each line) No problem to cast the value in the tokens to the field in the schema as they are equal. - schema < parsed tokens (from each line) It slices the tokens into the number of fields in schema. - schema > parsed tokens (from each line) It appends `null` into parsed tokens so that safely values can be casted with the schema. However, when `null` is appended in the third case, we should take `null` into account when casting the values. In case of `StringType`, it is fine as `UTF8String.fromString(datum)` produces `null` when the input is `null`. Therefore, this case will happen only when schema is explicitly given and schema includes data types that are not `StringType`. The codes below: ```scala val path = "/tmp/a" Seq("1").toDF().write.text(path.getAbsolutePath) val schema = StructType( StructField("a", IntegerType, true) :: StructField("b", IntegerType, true) :: Nil) spark.read.schema(schema).option("header", "false").csv(path).show() ``` prints **Before** ``` java.lang.NumberFormatException: null at java.lang.Integer.parseInt(Integer.java:542) at java.lang.Integer.parseInt(Integer.java:615) at scala.collection.immutable.StringLike$class.toInt(StringLike.scala:272) at scala.collection.immutable.StringOps.toInt(StringOps.scala:29) at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$.castTo(CSVInferSchema.scala:24) ``` **After** ``` +---++ | a| b| +---++ | 1|null| +---++ ``` ## How was this patch tested? Unit test in `CSVSuite.scala` and `CSVTypeCastSuite.scala` Author: hyukjinkwonCloses #15767 from HyukjinKwon/SPARK-18269. (cherry picked from commit 556a3b7d07f36c29ceb88fb6c24cc229e0e53ee4) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8fbcdbf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8fbcdbf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8fbcdbf Branch: refs/heads/branch-2.1 Commit: a8fbcdbf252634b1ebc910d8f5e86c16c39167f8 Parents: d2f2cf6 Author: hyukjinkwon Authored: Sun Nov 6 18:52:05 2016 -0800 Committer: Reynold Xin Committed: Sun Nov 6 18:52:18 2016 -0800 -- .../datasources/csv/CSVInferSchema.scala| 17 +++- .../execution/datasources/csv/CSVRelation.scala | 1 + .../execution/datasources/csv/CSVSuite.scala| 15 .../datasources/csv/CSVTypeCastSuite.scala | 93 +++- 4 files changed, 81 insertions(+), 45 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a8fbcdbf/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index 1981d86..c63aae9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -221,18 +221,27 @@ private[csv] object CSVTypeCast { * Currently we do not support complex types (ArrayType, MapType, StructType). * * For string types, this is simply the datum. For other types. - * For other nullable types, this is null if the string datum is empty. + * For other nullable types, returns null if it is null or equals to the value specified + * in `nullValue` option. * * @param datum string value - * @param castType SparkSQL type + * @param name field name in schema. + * @param castType data type to cast `datum` into. + * @param nullable nullability for the field. + * @param options CSV options. */ def castTo( datum: String, + name: String, castType: DataType, nullable: Boolean = true, options: CSVOptions = CSVOptions()): Any = { -if (nullable && datum == options.nullValue) { +// datum can be null if the number of fields found is less than the length of the schema +if (datum == options.nullValue || datum == null) { + if (!nullable) { +throw new RuntimeException(s"null value found but field $name is not nullable.") + } null } else { castType match { @@ -281,7 +290,7 @@
[48/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/cacheTable.html -- diff --git a/site/docs/1.6.3/api/R/cacheTable.html b/site/docs/1.6.3/api/R/cacheTable.html new file mode 100644 index 000..514da7c --- /dev/null +++ b/site/docs/1.6.3/api/R/cacheTable.html @@ -0,0 +1,63 @@ + +R: Cache Table + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +cacheTable {SparkR}R Documentation + +Cache Table + +Description + +Caches the specified table in-memory. + + + +Usage + + +cacheTable(sqlContext, tableName) + + + +Arguments + + +sqlContext + +SQLContext to use + +tableName + +The name of the table being cached + + + + +Value + +DataFrame + + + +Examples + +## Not run: +##D sc - sparkR.init() +##D sqlContext - sparkRSQL.init(sc) +##D path - path/to/file.json +##D df - read.json(sqlContext, path) +##D registerTempTable(df, table) +##D cacheTable(sqlContext, table) +## End(Not run) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/cancelJobGroup.html -- diff --git a/site/docs/1.6.3/api/R/cancelJobGroup.html b/site/docs/1.6.3/api/R/cancelJobGroup.html new file mode 100644 index 000..a3a0d7d --- /dev/null +++ b/site/docs/1.6.3/api/R/cancelJobGroup.html @@ -0,0 +1,53 @@ + +R: Cancel active jobs for the specified group + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +cancelJobGroup {SparkR}R Documentation + +Cancel active jobs for the specified group + +Description + +Cancel active jobs for the specified group + + + +Usage + + +cancelJobGroup(sc, groupId) + + + +Arguments + + +sc + +existing spark context + +groupId + +the ID of job group to be cancelled + + + + +Examples + +## Not run: +##D sc - sparkR.init() +##D cancelJobGroup(sc, myJobGroup) +## End(Not run) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/cast.html -- diff --git a/site/docs/1.6.3/api/R/cast.html b/site/docs/1.6.3/api/R/cast.html new file mode 100644 index 000..adf5017 --- /dev/null +++ b/site/docs/1.6.3/api/R/cast.html @@ -0,0 +1,48 @@ + +R: Casts the column to a different data type. + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +cast {SparkR}R Documentation + +Casts the column to a different data type. + +Description + +Casts the column to a different data type. + + + +Usage + + +## S4 method for signature 'Column' +cast(x, dataType) + + + +See Also + +Other colum_func: alias; +between; otherwise; +substr + + + +Examples + +## Not run: +##D cast(df$age, string) +##D cast(df$name, list(type=array, elementType=byte, containsNull = TRUE)) +## End(Not run) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/cbrt.html -- diff --git a/site/docs/1.6.3/api/R/cbrt.html b/site/docs/1.6.3/api/R/cbrt.html new file mode 100644 index 000..7e5ded9 --- /dev/null +++ b/site/docs/1.6.3/api/R/cbrt.html @@ -0,0 +1,70 @@ + +R: cbrt + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +cbrt {SparkR}R Documentation + +cbrt + +Description + +Computes the cube-root of the given value. + + + +Usage + + +## S4 method for signature 'Column' +cbrt(x) + +cbrt(x) + + + +See Also + +Other math_funcs: acos; asin; +atan2; atan; +bin, bin; ceil, +ceil, ceiling; +conv, conv; +corr; cosh; +cos; expm1; +exp; factorial; +floor; hex, +hex; hypot, +hypot; log10; +log1p; log2; +log; pmod, +pmod; rint, +rint; round; +shiftLeft, shiftLeft; +shiftRightUnsigned, +shiftRightUnsigned; +shiftRight, shiftRight; +sign, signum, +signum; sinh; +sin; sqrt; +tanh; tan; +toDegrees, toDegrees; +toRadians, toRadians; +unhex, unhex + + + +Examples + +## Not
[15/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaHadoopRDD.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaHadoopRDD.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaHadoopRDD.html new file mode 100644 index 000..def24cc --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaHadoopRDD.html @@ -0,0 +1,321 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +JavaHadoopRDD (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java +Class JavaHadoopRDDK,V + + + +Object + + +org.apache.spark.api.java.JavaPairRDDK,V + + +org.apache.spark.api.java.JavaHadoopRDDK,V + + + + + + + + + +All Implemented Interfaces: +java.io.Serializable, JavaRDDLikescala.Tuple2K,V,JavaPairRDDK,V + + + +public class JavaHadoopRDDK,V +extends JavaPairRDDK,V +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +JavaHadoopRDD(HadoopRDDK,Vrdd, + scala.reflect.ClassTagKkClassTag, + scala.reflect.ClassTagVvClassTag) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +scala.reflect.ClassTagK +kClassTag() + + +RJavaRDDR +mapPartitionsWithInputSplit(Function2org.apache.hadoop.mapred.InputSplit,java.util.Iteratorscala.Tuple2K,V,java.util.IteratorRf, + booleanpreservesPartitioning) +Maps over a partition, providing the InputSplit that was used as the base of the partition. + + + +scala.reflect.ClassTagV +vClassTag() + + + + + + +Methods inherited from classorg.apache.spark.api.java.JavaPairRDD +aggregateByKey, aggregateByKey, aggregateByKey, cache, classTag, coalesce, coalesce, cogroup, cogroup, cogroup, cogroup, cogroup, cogroup, cogroup, cogroup, cogroup, collectAsMap, combineByKey, combineByKey, combineByKey, combineByKey, countApproxDistinctByKey, countApproxDistinctByKey, countApproxDistinctByKey, countByKey, countByKeyApprox, countByKeyApprox, distinct, distinct, filter, first, flatMapValues, foldByKey, foldByKey, foldByKey, fromJavaRDD, fromRDD, fullOuterJoin, fullOuterJoin, fullOuterJoin, groupByKey, groupByKey, groupByKey, groupWith, groupWith, groupWith, intersection, join, join, join, keys, leftOuterJoin, leftOuterJoin, leftOuterJoin, lookup, mapValues, partitionBy, persist, rdd, reduceByKey, reduceByKey, reduceByKey, reduceByKeyLocally, repartition, repartitionAndSortWithinPartitions, repartitionAnd SortWithinPartitions, rightOuterJoin, rightOuterJoin, rightOuterJoin, sample, sample, sampleByKey, sampleByKey, sampleByKeyExact, sampleByKeyExact, saveAsHadoopDataset, saveAsHadoopFile, saveAsHadoopFile, saveAsHadoopFile, saveAsNewAPIHadoopDataset, saveAsNewAPIHadoopFile, saveAsNewAPIHadoopFile, setName, sortByKey, sortByKey, sortByKey, sortByKey, sortByKey, sortByKey, subtract, subtract, subtract, subtractByKey, subtractByKey, subtractByKey, toRDD, union, unpersist, unpersist, values, wrapRDD + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.api.java.JavaRDDLike +aggregate, cartesian, checkpoint, collect, collectAsync, collectPartitions, context, count, countApprox, countApprox, countApproxDistinct, countAsync, countByValue, countByValueApprox, countByValueApprox, flatMap, flatMapToDouble , flatMapToPair, fold, foreach, foreachAsync, foreachPartition, foreachPartitionAsync, getCheck pointFile, getNumPartitions, getStorageLevel, glom, groupBy, groupBy, id, isCheckpointed, isEmpty, iterator, keyBy, map, mapPartitions, mapPartitions, mapPartitionsToDouble, mapPartitionsToDouble, mapPartitionsToPair, mapPartitionsToPair, mapPartitionsWithIndex, mapToDouble, mapToPair, max, min, name, partitioner, partitions, pipe, pipe, pipe, reduce, saveAsObjectFile, saveAsTextFile, saveAsTextFile, splits, take, takeAsync, takeOrdered, takeOrdered, takeSample, takeSample, toArray, toDebugString, toLocalIterator, top, top, treeAggregate, treeAggregate,
[34/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/constant-values.html -- diff --git a/site/docs/1.6.3/api/java/constant-values.html b/site/docs/1.6.3/api/java/constant-values.html new file mode 100644 index 000..d193189 --- /dev/null +++ b/site/docs/1.6.3/api/java/constant-values.html @@ -0,0 +1,219 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Constant Field Values (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +No Frames + + +All Classes + + + + + + + + + + +Constant Field Values +Contents + +org.apache.* + + + + + +org.apache.* + + + +org.apache.spark.launcher.SparkLauncher + +Modifier and Type +Constant Field +Value + + + + + +publicstaticfinalString +CHILD_CONNECTION_TIMEOUT +"spark.launcher.childConectionTimeout" + + + + +publicstaticfinalString +CHILD_PROCESS_LOGGER_NAME +"spark.launcher.childProcLoggerName" + + + + +publicstaticfinalString +DRIVER_EXTRA_CLASSPATH +"spark.driver.extraClassPath" + + + + +publicstaticfinalString +DRIVER_EXTRA_JAVA_OPTIONS +"spark.driver.extraJavaOptions" + + + + +publicstaticfinalString +DRIVER_EXTRA_LIBRARY_PATH +"spark.driver.extraLibraryPath" + + + + +publicstaticfinalString +DRIVER_MEMORY +"spark.driver.memory" + + + + +publicstaticfinalString +EXECUTOR_CORES +"spark.executor.cores" + + + + +publicstaticfinalString +EXECUTOR_EXTRA_CLASSPATH +"spark.executor.extraClassPath" + + + + +publicstaticfinalString +EXECUTOR_EXTRA_JAVA_OPTIONS +"spark.executor.extraJavaOptions" + + + + +publicstaticfinalString +EXECUTOR_EXTRA_LIBRARY_PATH +"spark.executor.extraLibraryPath" + + + + +publicstaticfinalString +EXECUTOR_MEMORY +"spark.executor.memory" + + + + +publicstaticfinalString +SPARK_MASTER +"spark.master" + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +No Frames + + +All Classes + + + + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/deprecated-list.html -- diff --git a/site/docs/1.6.3/api/java/deprecated-list.html b/site/docs/1.6.3/api/java/deprecated-list.html new file mode 100644 index 000..a7b2377 --- /dev/null +++ b/site/docs/1.6.3/api/java/deprecated-list.html @@ -0,0 +1,584 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Deprecated List (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +No Frames + + +All Classes + + + + + + + + + + +Deprecated API +Contents + +Deprecated Methods + + + + + + + + +Deprecated Methods + +Method and Description + + + +org.apache.spark.streaming.StreamingContext.awaitTermination(long) +As of 1.3.0, replaced by awaitTerminationOrTimeout(Long). + + + +org.apache.spark.streaming.api.java.JavaStreamingContext.awaitTermination(long) +As of 1.3.0, replaced by awaitTerminationOrTimeout(Long). + + + +org.apache.spark.sql.functions.callUDF(Function0, DataType) +As of 1.5.0, since it's redundant with udf() + This will be removed in Spark 2.0. + + + +org.apache.spark.sql.functions.callUDF(Function1, DataType, Column) +As of 1.5.0, since it's redundant with udf() + This will be removed in Spark 2.0. + + + +org.apache.spark.sql.functions.callUDF(Function10, DataType, Column, Column, Column, Column, Column, Column, Column, Column, Column, Column) +As of 1.5.0, since it's redundant with udf(). + This will be removed in Spark 2.0. + + + +org.apache.spark.sql.functions.callUDF(Function2, DataType, Column, Column) +As of 1.5.0, since it's redundant with udf() + This will be removed in Spark 2.0. + + + +org.apache.spark.sql.functions.callUDF(Function3, DataType, Column, Column, Column)
[40/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/shiftLeft.html -- diff --git a/site/docs/1.6.3/api/R/shiftLeft.html b/site/docs/1.6.3/api/R/shiftLeft.html new file mode 100644 index 000..fc7ffd9 --- /dev/null +++ b/site/docs/1.6.3/api/R/shiftLeft.html @@ -0,0 +1,71 @@ + +R: shiftLeft + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +shiftLeft {SparkR}R Documentation + +shiftLeft + +Description + +Shift the the given value numBits left. If the given value is a long value, this function +will return a long value else it will return an integer value. + + + +Usage + + +## S4 method for signature 'Column,numeric' +shiftLeft(y, x) + +shiftLeft(y, x) + + + +See Also + +Other math_funcs: acos; asin; +atan2; atan; +bin, bin; cbrt, +cbrt; ceil, +ceil, ceiling; +conv, conv; +corr; cosh; +cos; expm1; +exp; factorial; +floor; hex, +hex; hypot, +hypot; log10; +log1p; log2; +log; pmod, +pmod; rint, +rint; round; +shiftRightUnsigned, +shiftRightUnsigned; +shiftRight, shiftRight; +sign, signum, +signum; sinh; +sin; sqrt; +tanh; tan; +toDegrees, toDegrees; +toRadians, toRadians; +unhex, unhex + + + +Examples + +## Not run: shiftLeft(df$c, 1) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/shiftRight.html -- diff --git a/site/docs/1.6.3/api/R/shiftRight.html b/site/docs/1.6.3/api/R/shiftRight.html new file mode 100644 index 000..1849cce --- /dev/null +++ b/site/docs/1.6.3/api/R/shiftRight.html @@ -0,0 +1,71 @@ + +R: shiftRight + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +shiftRight {SparkR}R Documentation + +shiftRight + +Description + +Shift the the given value numBits right. If the given value is a long value, it will return +a long value else it will return an integer value. + + + +Usage + + +## S4 method for signature 'Column,numeric' +shiftRight(y, x) + +shiftRight(y, x) + + + +See Also + +Other math_funcs: acos; asin; +atan2; atan; +bin, bin; cbrt, +cbrt; ceil, +ceil, ceiling; +conv, conv; +corr; cosh; +cos; expm1; +exp; factorial; +floor; hex, +hex; hypot, +hypot; log10; +log1p; log2; +log; pmod, +pmod; rint, +rint; round; +shiftLeft, shiftLeft; +shiftRightUnsigned, +shiftRightUnsigned; sign, +signum, signum; +sinh; sin; +sqrt; tanh; +tan; toDegrees, +toDegrees; toRadians, +toRadians; unhex, +unhex + + + +Examples + +## Not run: shiftRight(df$c, 1) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/shiftRightUnsigned.html -- diff --git a/site/docs/1.6.3/api/R/shiftRightUnsigned.html b/site/docs/1.6.3/api/R/shiftRightUnsigned.html new file mode 100644 index 000..63c7f8c --- /dev/null +++ b/site/docs/1.6.3/api/R/shiftRightUnsigned.html @@ -0,0 +1,70 @@ + +R: shiftRightUnsigned + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +shiftRightUnsigned {SparkR}R Documentation + +shiftRightUnsigned + +Description + +Unsigned shift the the given value numBits right. If the given value is a long value, +it will return a long value else it will return an integer value. + + + +Usage + + +## S4 method for signature 'Column,numeric' +shiftRightUnsigned(y, x) + +shiftRightUnsigned(y, x) + + + +See Also + +Other math_funcs: acos; asin; +atan2; atan; +bin, bin; cbrt, +cbrt; ceil, +ceil, ceiling; +conv, conv; +corr; cosh; +cos; expm1; +exp; factorial; +floor; hex, +hex; hypot, +hypot; log10; +log1p; log2; +log; pmod, +pmod; rint, +rint; round; +shiftLeft, shiftLeft; +shiftRight, shiftRight; +sign, signum, +signum; sinh; +sin; sqrt; +tanh; tan; +toDegrees, toDegrees; +toRadians, toRadians; +unhex, unhex + + + +Examples + +## Not run: shiftRightUnsigned(df$c, 1) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/show.html -- diff --git a/site/docs/1.6.3/api/R/show.html b/site/docs/1.6.3/api/R/show.html new file mode 100644 index
[01/51] [partial] spark-website git commit: Add 1.6.3 doc.
Repository: spark-website Updated Branches: refs/heads/asf-site f284a2687 -> 24d32b75d http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/graphx/PartitionStrategy.EdgePartition1D$.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/graphx/PartitionStrategy.EdgePartition1D$.html b/site/docs/1.6.3/api/java/org/apache/spark/graphx/PartitionStrategy.EdgePartition1D$.html new file mode 100644 index 000..0d54c7f --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/graphx/PartitionStrategy.EdgePartition1D$.html @@ -0,0 +1,346 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +PartitionStrategy.EdgePartition1D$ (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.graphx +Class PartitionStrategy.EdgePartition1D$ + + + +Object + + +org.apache.spark.graphx.PartitionStrategy.EdgePartition1D$ + + + + + + + +All Implemented Interfaces: +java.io.Serializable, PartitionStrategy, scala.Equals, scala.Product + + +Enclosing interface: +PartitionStrategy + + + +public static class PartitionStrategy.EdgePartition1D$ +extends Object +implements PartitionStrategy, scala.Product, scala.Serializable +Assigns edges to partitions using only the source vertex ID, colocating edges with the same + source. +See Also:Serialized Form + + + + + + + + + + + +Nested Class Summary + + + + +Nested classes/interfaces inherited from interfaceorg.apache.spark.graphx.PartitionStrategy +PartitionStrategy.CanonicalRandomVertexCut$, PartitionStrategy.EdgePartition1D$, PartitionStrategy.EdgePartition2D$, PartitionStrategy.RandomVertexCut$ + + + + + + + + +Field Summary + +Fields + +Modifier and Type +Field and Description + + +static PartitionStrategy.EdgePartition1D$ +MODULE$ +Static reference to the singleton instance of this Scala object. + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +PartitionStrategy.EdgePartition1D$() + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +int +getPartition(longsrc, +longdst, +intnumParts) +Returns the partition number for a given edge. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfacescala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interfacescala.Equals +canEqual, equals + + + + + + + + + + + + + + +Field Detail + + + + + +MODULE$ +public static finalPartitionStrategy.EdgePartition1D$ MODULE$ +Static reference to the singleton instance of this Scala object. + + + + + + + + + +Constructor Detail + + + + + +PartitionStrategy.EdgePartition1D$ +publicPartitionStrategy.EdgePartition1D$() + + + + + + + + + +Method Detail + + + + + +getPartition +publicintgetPartition(longsrc, + longdst, + intnumParts) +Description copied from interface:PartitionStrategy +Returns the partition number for a given edge. + +Specified by: +getPartitionin interfacePartitionStrategy + + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[19/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/SparkJobInfo.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/SparkJobInfo.html b/site/docs/1.6.3/api/java/org/apache/spark/SparkJobInfo.html new file mode 100644 index 000..e76754f --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/SparkJobInfo.html @@ -0,0 +1,243 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +SparkJobInfo (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Interface SparkJobInfo + + + + + + +All Superinterfaces: +java.io.Serializable + + +All Known Implementing Classes: +SparkJobInfoImpl + + + +public interface SparkJobInfo +extends java.io.Serializable +Exposes information about Spark Jobs. + + This interface is not designed to be implemented outside of Spark. We may add additional methods + which may break binary compatibility with outside implementations. + + + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +int +jobId() + + +int[] +stageIds() + + +JobExecutionStatus +status() + + + + + + + + + + + + + + + +Method Detail + + + + + +jobId +intjobId() + + + + + + + +stageIds +int[]stageIds() + + + + + + + +status +JobExecutionStatusstatus() + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/SparkJobInfoImpl.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/SparkJobInfoImpl.html b/site/docs/1.6.3/api/java/org/apache/spark/SparkJobInfoImpl.html new file mode 100644 index 000..eef39e2 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/SparkJobInfoImpl.html @@ -0,0 +1,302 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +SparkJobInfoImpl (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class SparkJobInfoImpl + + + +Object + + +org.apache.spark.SparkJobInfoImpl + + + + + + + +All Implemented Interfaces: +java.io.Serializable, SparkJobInfo + + + +public class SparkJobInfoImpl +extends Object +implements SparkJobInfo +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +SparkJobInfoImpl(intjobId, +int[]stageIds, +JobExecutionStatusstatus) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +int +jobId() + + +int[] +stageIds() + + +JobExecutionStatus +status() + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + +SparkJobInfoImpl +publicSparkJobInfoImpl(intjobId, +int[]stageIds, +JobExecutionStatusstatus) + + + + + + + + + +Method Detail + + + + + +jobId +publicintjobId() + +Specified by: +jobIdin interfaceSparkJobInfo + + + + + + + + +stageIds +publicint[]stageIds() + +Specified by: +stageIdsin interfaceSparkJobInfo + + + + + + + + +status +publicJobExecutionStatusstatus() + +Specified by: +statusin
[24/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/RangeDependency.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/RangeDependency.html b/site/docs/1.6.3/api/java/org/apache/spark/RangeDependency.html new file mode 100644 index 000..92e4a7b --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/RangeDependency.html @@ -0,0 +1,297 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +RangeDependency (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class RangeDependencyT + + + +Object + + +org.apache.spark.DependencyT + + +org.apache.spark.NarrowDependencyT + + +org.apache.spark.RangeDependencyT + + + + + + + + + + + +All Implemented Interfaces: +java.io.Serializable + + + +public class RangeDependencyT +extends NarrowDependencyT +:: DeveloperApi :: + Represents a one-to-one dependency between ranges of partitions in the parent and child RDDs. + param: rdd the parent RDD + param: inStart the start of the range in the parent RDD + param: outStart the start of the range in the child RDD + param: length the length of the range +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +RangeDependency(RDDTrdd, + intinStart, + intoutStart, + intlength) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +scala.collection.immutable.ListObject +getParents(intpartitionId) +Get the parent partitions for a child partition. + + + + + + + +Methods inherited from classorg.apache.spark.NarrowDependency +rdd + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + +RangeDependency +publicRangeDependency(RDDTrdd, + intinStart, + intoutStart, + intlength) + + + + + + + + + +Method Detail + + + + + +getParents +publicscala.collection.immutable.ListObjectgetParents(intpartitionId) +Description copied from class:NarrowDependency +Get the parent partitions for a child partition. + +Specified by: +getParentsin classNarrowDependencyT +Parameters:partitionId - a partition of the child RDD +Returns:the partitions of the parent RDD that the child partition depends upon + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/RangePartitioner.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/RangePartitioner.html b/site/docs/1.6.3/api/java/org/apache/spark/RangePartitioner.html new file mode 100644 index 000..c367f7f --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/RangePartitioner.html @@ -0,0 +1,390 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +RangePartitioner (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class RangePartitionerK,V + + + +Object + + +org.apache.spark.Partitioner + + +org.apache.spark.RangePartitionerK,V +
[29/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html b/site/docs/1.6.3/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html new file mode 100644 index 000..187492a --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/AccumulatorParam.IntAccumulatorParam$.html @@ -0,0 +1,347 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +AccumulatorParam.IntAccumulatorParam$ (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class AccumulatorParam.IntAccumulatorParam$ + + + +Object + + +org.apache.spark.AccumulatorParam.IntAccumulatorParam$ + + + + + + + +All Implemented Interfaces: +java.io.Serializable, AccumulableParamObject,Object, AccumulatorParamObject + + +Enclosing interface: +AccumulatorParamT + + + +public static class AccumulatorParam.IntAccumulatorParam$ +extends Object +implements AccumulatorParamObject +See Also:Serialized Form + + + + + + + + + + + +Nested Class Summary + + + + +Nested classes/interfaces inherited from interfaceorg.apache.spark.AccumulatorParam +AccumulatorParam.DoubleAccumulatorParam$, AccumulatorParam.FloatAccumulatorParam$, AccumulatorParam.IntAccumulatorParam$, AccumulatorParam.LongAccumulatorParam$ + + + + + + + + +Field Summary + +Fields + +Modifier and Type +Field and Description + + +static AccumulatorParam.IntAccumulatorParam$ +MODULE$ +Static reference to the singleton instance of this Scala object. + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +AccumulatorParam.IntAccumulatorParam$() + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +int +addInPlace(intt1, + intt2) + + +int +zero(intinitialValue) + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.AccumulatorParam +addAccumulator + + + + + +Methods inherited from interfaceorg.apache.spark.AccumulableParam +addInPlace, zero + + + + + + + + + + + + + + +Field Detail + + + + + +MODULE$ +public static finalAccumulatorParam.IntAccumulatorParam$ MODULE$ +Static reference to the singleton instance of this Scala object. + + + + + + + + + +Constructor Detail + + + + + +AccumulatorParam.IntAccumulatorParam$ +publicAccumulatorParam.IntAccumulatorParam$() + + + + + + + + + +Method Detail + + + + + +addInPlace +publicintaddInPlace(intt1, + intt2) + + + + + + + +zero +publicintzero(intinitialValue) + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/AccumulatorParam.LongAccumulatorParam$.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/AccumulatorParam.LongAccumulatorParam$.html b/site/docs/1.6.3/api/java/org/apache/spark/AccumulatorParam.LongAccumulatorParam$.html new file mode 100644 index 000..9bec6d5 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/AccumulatorParam.LongAccumulatorParam$.html @@ -0,0 +1,347 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +AccumulatorParam.LongAccumulatorParam$ (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + +
[13/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaRDD.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaRDD.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaRDD.html new file mode 100644 index 000..73b7920 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaRDD.html @@ -0,0 +1,757 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +JavaRDD (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java +Class JavaRDDT + + + +Object + + +org.apache.spark.api.java.JavaRDDT + + + + + + + +All Implemented Interfaces: +java.io.Serializable, JavaRDDLikeT,JavaRDDT + + + +public class JavaRDDT +extends Object +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +JavaRDD(RDDTrdd, + scala.reflect.ClassTagTclassTag) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +JavaRDDT +cache() +Persist this RDD with the default storage level (`MEMORY_ONLY`). + + + +scala.reflect.ClassTagT +classTag() + + +JavaRDDT +coalesce(intnumPartitions) +Return a new RDD that is reduced into numPartitions partitions. + + + +JavaRDDT +coalesce(intnumPartitions, +booleanshuffle) +Return a new RDD that is reduced into numPartitions partitions. + + + +JavaRDDT +distinct() +Return a new RDD containing the distinct elements in this RDD. + + + +JavaRDDT +distinct(intnumPartitions) +Return a new RDD containing the distinct elements in this RDD. + + + +JavaRDDT +filter(FunctionT,Booleanf) +Return a new RDD containing only the elements that satisfy a predicate. + + + +static TJavaRDDT +fromRDD(RDDTrdd, + scala.reflect.ClassTagTevidence$1) + + +JavaRDDT +intersection(JavaRDDTother) +Return the intersection of this RDD and another one. + + + +JavaRDDT +persist(StorageLevelnewLevel) +Set this RDD's storage level to persist its values across operations after the first time + it is computed. + + + +JavaRDDT[] +randomSplit(double[]weights) +Randomly splits this RDD with the provided weights. + + + +JavaRDDT[] +randomSplit(double[]weights, + longseed) +Randomly splits this RDD with the provided weights. + + + +RDDT +rdd() + + +JavaRDDT +repartition(intnumPartitions) +Return a new RDD that has exactly numPartitions partitions. + + + +JavaRDDT +sample(booleanwithReplacement, + doublefraction) +Return a sampled subset of this RDD. + + + +JavaRDDT +sample(booleanwithReplacement, + doublefraction, + longseed) +Return a sampled subset of this RDD. + + + +JavaRDDT +setName(Stringname) +Assign a name to this RDD + + + +SJavaRDDT +sortBy(FunctionT,Sf, + booleanascending, + intnumPartitions) +Return this RDD sorted by the given key function. + + + +JavaRDDT +subtract(JavaRDDTother) +Return an RDD with the elements from this that are not in other. + + + +JavaRDDT +subtract(JavaRDDTother, +intnumPartitions) +Return an RDD with the elements from this that are not in other. + + + +JavaRDDT +subtract(JavaRDDTother, +Partitionerp) +Return an RDD with the elements from this that are not in other. + + + +static TRDDT +toRDD(JavaRDDTrdd) + + +String +toString() + + +JavaRDDT +union(JavaRDDTother) +Return the union of this RDD and another one. + + + +JavaRDDT +unpersist() +Mark the RDD as non-persistent, and remove all blocks for it from memory and disk. + + + +JavaRDDT +unpersist(booleanblocking) +Mark the RDD as non-persistent, and remove all blocks for it from memory and disk. + + + +JavaRDDT +wrapRDD(RDDTrdd) + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.api.java.JavaRDDLike +aggregate, cartesian, checkpoint, collect, collectAsync, collectPartitions, context, count, countApprox, countApprox, countApproxDistinct, countAsync, countByValue, countByValueApprox, countByValueApprox, first, flatMap, flatMapToDouble, flatMapToPair, fold, foreach, foreachAsync, foreachPartition, foreachPartitionAsync, getCheckpointFile, getNumPartitions, getStorageLevel, glom, groupBy, groupBy, id,
[50/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/README.md -- diff --git a/site/docs/1.6.3/README.md b/site/docs/1.6.3/README.md new file mode 100644 index 000..bcea93e --- /dev/null +++ b/site/docs/1.6.3/README.md @@ -0,0 +1,70 @@ +Welcome to the Spark documentation! + +This readme will walk you through navigating and building the Spark documentation, which is included +here with the Spark source code. You can also find documentation specific to release versions of +Spark at http://spark.apache.org/documentation.html. + +Read on to learn more about viewing documentation in plain text (i.e., markdown) or building the +documentation yourself. Why build it yourself? So that you have the docs that corresponds to +whichever version of Spark you currently have checked out of revision control. + +## Prerequisites +The Spark documentation build uses a number of tools to build HTML docs and API docs in Scala, +Python and R. + +You need to have [Ruby](https://www.ruby-lang.org/en/documentation/installation/) and +[Python](https://docs.python.org/2/using/unix.html#getting-and-installing-the-latest-version-of-python) +installed. Also install the following libraries: +```sh +$ sudo gem install jekyll jekyll-redirect-from pygments.rb +$ sudo pip install Pygments +# Following is needed only for generating API docs +$ sudo pip install sphinx +$ Rscript -e 'install.packages(c("knitr", "devtools"), repos="http://cran.stat.ucla.edu/;)' +``` +## Generating the Documentation HTML + +We include the Spark documentation as part of the source (as opposed to using a hosted wiki, such as +the github wiki, as the definitive documentation) to enable the documentation to evolve along with +the source code and be captured by revision control (currently git). This way the code automatically +includes the version of the documentation that is relevant regardless of which version or release +you have checked out or downloaded. + +In this directory you will find textfiles formatted using Markdown, with an ".md" suffix. You can +read those text files directly if you want. Start with index.md. + +Execute `jekyll build` from the `docs/` directory to compile the site. Compiling the site with +Jekyll will create a directory called `_site` containing index.html as well as the rest of the +compiled files. + +$ cd docs +$ jekyll build + +You can modify the default Jekyll build as follows: +```sh +# Skip generating API docs (which takes a while) +$ SKIP_API=1 jekyll build + +# Serve content locally on port 4000 +$ jekyll serve --watch + +# Build the site with extra features used on the live page +$ PRODUCTION=1 jekyll build +``` + +## API Docs (Scaladoc, Sphinx, roxygen2) + +You can build just the Spark scaladoc by running `build/sbt unidoc` from the SPARK_PROJECT_ROOT directory. + +Similarly, you can build just the PySpark docs by running `make html` from the +SPARK_PROJECT_ROOT/python/docs directory. Documentation is only generated for classes that are listed as +public in `__init__.py`. The SparkR docs can be built by running SPARK_PROJECT_ROOT/R/create-docs.sh. + +When you run `jekyll` in the `docs` directory, it will also copy over the scaladoc for the various +Spark subprojects into the `docs` directory (and then also into the `_site` directory). We use a +jekyll plugin to run `build/sbt unidoc` before building the site so if you haven't run it (recently) it +may take some time as it generates all of the scaladoc. The jekyll plugin also generates the +PySpark docs using [Sphinx](http://sphinx-doc.org/). + +NOTE: To skip the step of building and copying over the Scala, Python, R API docs, run `SKIP_API=1 +jekyll`. http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api.html -- diff --git a/site/docs/1.6.3/api.html b/site/docs/1.6.3/api.html new file mode 100644 index 000..ec67acd --- /dev/null +++ b/site/docs/1.6.3/api.html @@ -0,0 +1,180 @@ + + + + + + + + + +Spark API Documentation - Spark 1.6.3 Documentation + + + + + + +body { +padding-top: 60px; +padding-bottom: 40px; +} + + + + + + + + + + + + + var _gaq = _gaq || []; + _gaq.push(['_setAccount', 'UA-32518208-2']); + _gaq.push(['_trackPageview']); + + (function() { +var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; +ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; +
[37/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/write.json.html -- diff --git a/site/docs/1.6.3/api/R/write.json.html b/site/docs/1.6.3/api/R/write.json.html new file mode 100644 index 000..9095a09 --- /dev/null +++ b/site/docs/1.6.3/api/R/write.json.html @@ -0,0 +1,140 @@ + +R: write.json + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +write.json {SparkR}R Documentation + +write.json + +Description + +Save the contents of a DataFrame as a JSON file (one object per line). Files written out +with this method can be read back in as a DataFrame using read.json(). + + + +Usage + + +## S4 method for signature 'DataFrame,character' +write.json(x, path) + +write.json(x, path) + + + +Arguments + + +x + +A SparkSQL DataFrame + +path + +The directory where the file is saved + + + + +See Also + +Other DataFrame functions: $, +$-, select, +select, +select,DataFrame,Column-method, +select,DataFrame,list-method, +selectExpr; DataFrame-class, +dataFrame, groupedData; +[, [, [[, +subset; agg, +agg, +count,GroupedData-method, +summarize, summarize; +arrange, arrange, +arrange, orderBy, +orderBy; as.data.frame, +as.data.frame,DataFrame-method; +attach, +attach,DataFrame-method; +cache; collect; +colnames, colnames, +colnames-, colnames-, +columns, names, +names-; coltypes, +coltypes, coltypes-, +coltypes-; columns, +dtypes, printSchema, +schema, schema; +count, nrow; +describe, describe, +describe, summary, +summary, +summary,PipelineModel-method; +dim; distinct, +unique; dropna, +dropna, fillna, +fillna, na.omit, +na.omit; dtypes; +except, except; +explain, explain; +filter, filter, +where, where; +first, first; +groupBy, groupBy, +group_by, group_by; +head; insertInto, +insertInto; intersect, +intersect; isLocal, +isLocal; join; +limit, limit; +merge, merge; +mutate, mutate, +transform, transform; +ncol; persist; +printSchema; rbind, +rbind, unionAll, +unionAll; registerTempTable, +registerTempTable; rename, +rename, withColumnRenamed, +withColumnRenamed; +repartition; sample, +sample, sample_frac, +sample_frac; +saveAsParquetFile, +saveAsParquetFile, +write.parquet, write.parquet; +saveAsTable, saveAsTable; +saveDF, saveDF, +write.df, write.df, +write.df; selectExpr; +showDF, showDF; +show, show, +show,GroupedData-method; str; +take; unpersist; +withColumn, withColumn; +write.text, write.text + + + +Examples + +## Not run: +##D sc - sparkR.init() +##D sqlContext - sparkRSQL.init(sc) +##D path - path/to/file.json +##D df - read.json(sqlContext, path) +##D write.json(df, /tmp/sparkr-tmp/) +## End(Not run) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/write.parquet.html -- diff --git a/site/docs/1.6.3/api/R/write.parquet.html b/site/docs/1.6.3/api/R/write.parquet.html new file mode 100644 index 000..ba2c31d --- /dev/null +++ b/site/docs/1.6.3/api/R/write.parquet.html @@ -0,0 +1,144 @@ + +R: write.parquet + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +write.parquet {SparkR}R Documentation + +write.parquet + +Description + +Save the contents of a DataFrame as a Parquet file, preserving the schema. Files written out +with this method can be read back in as a DataFrame using read.parquet(). + + + +Usage + + +## S4 method for signature 'DataFrame,character' +write.parquet(x, path) + +## S4 method for signature 'DataFrame,character' +saveAsParquetFile(x, path) + +write.parquet(x, path) + +saveAsParquetFile(x, path) + + + +Arguments + + +x + +A SparkSQL DataFrame + +path + +The directory where the file is saved + + + + +See Also + +Other DataFrame functions: $, +$-, select, +select, +select,DataFrame,Column-method, +select,DataFrame,list-method, +selectExpr; DataFrame-class, +dataFrame, groupedData; +[, [, [[, +subset; agg, +agg, +count,GroupedData-method, +summarize, summarize; +arrange, arrange, +arrange, orderBy, +orderBy; as.data.frame, +as.data.frame,DataFrame-method; +attach, +attach,DataFrame-method; +cache; collect; +colnames, colnames, +colnames-, colnames-, +columns, names, +names-; coltypes, +coltypes, coltypes-, +coltypes-; columns, +dtypes, printSchema, +schema, schema; +count, nrow; +describe, describe, +describe, summary, +summary, +summary,PipelineModel-method; +dim; distinct, +unique; dropna, +dropna, fillna, +fillna, na.omit, +na.omit; dtypes;
[33/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/index-all.html -- diff --git a/site/docs/1.6.3/api/java/index-all.html b/site/docs/1.6.3/api/java/index-all.html new file mode 100644 index 000..9471651 --- /dev/null +++ b/site/docs/1.6.3/api/java/index-all.html @@ -0,0 +1,25230 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Index (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +No Frames + + +All Classes + + + + + + + + + +ABCDEFGHIJKLMNOPQRSTUVWYZ_ + + +A + +abs(Column) - Static method in class org.apache.spark.sql.functions + +Computes the absolute value. + +abs() - Method in class org.apache.spark.sql.types.Decimal + +AbsoluteError - Class in org.apache.spark.mllib.tree.loss + +:: DeveloperApi :: + Class for absolute error loss calculation (for regression). + +AbsoluteError() - Constructor for class org.apache.spark.mllib.tree.loss.AbsoluteError + +accessTime() - Method in class org.apache.spark.sql.sources.HadoopFsRelation.FakeFileStatus + +accId() - Method in class org.apache.spark.CleanAccum + +AccumulableR,T - Class in org.apache.spark + +A data type that can be accumulated, ie has an commutative and associative "add" operation, + but where the result type, R, may be different from the element type being added, T. + +Accumulable(R, AccumulableParamR, T, OptionString) - Constructor for class org.apache.spark.Accumulable + +Accumulable(R, AccumulableParamR, T) - Constructor for class org.apache.spark.Accumulable + +accumulable(T, AccumulableParamT, R) - Method in class org.apache.spark.api.java.JavaSparkContext + +Create an Accumulable shared variable of the given type, to which tasks + can "add" values with add. + +accumulable(T, String, AccumulableParamT, R) - Method in class org.apache.spark.api.java.JavaSparkContext + +Create an Accumulable shared variable of the given type, to which tasks + can "add" values with add. + +accumulable(R, AccumulableParamR, T) - Method in class org.apache.spark.SparkContext + +Create an Accumulable shared variable, to which tasks can add values + with +=. + +accumulable(R, String, AccumulableParamR, T) - Method in class org.apache.spark.SparkContext + +Create an Accumulable shared variable, with a name for display in the + Spark UI. + +accumulableCollection(R, Function1R, GrowableT, ClassTagR) - Method in class org.apache.spark.SparkContext + +Create an accumulator from a "mutable collection" type. + +AccumulableInfo - Class in org.apache.spark.scheduler + +:: DeveloperApi :: + Information about an Accumulable modified during a task or stage. + +AccumulableInfo - Class in org.apache.spark.status.api.v1 + +AccumulableParamR,T - Interface in org.apache.spark + +Helper object defining how to accumulate values of a particular type. + +accumulables() - Method in class org.apache.spark.scheduler.StageInfo + +Terminal values of accumulables updated during this stage. + +accumulables() - Method in class org.apache.spark.scheduler.TaskInfo + +Intermediate updates to accumulables during this task. + +AccumulatorT - Class in org.apache.spark + +A simpler value of Accumulable where the result type being accumulated is the same + as the types of elements being merged, i.e. + +Accumulator(T, AccumulatorParamT, OptionString) - Constructor for class org.apache.spark.Accumulator + +Accumulator(T, AccumulatorParamT) - Constructor for class org.apache.spark.Accumulator + +accumulator(int) - Method in class org.apache.spark.api.java.JavaSparkContext + +Create an Accumulator integer variable, which tasks can "add" values + to using the add method. + +accumulator(int, String) - Method in class org.apache.spark.api.java.JavaSparkContext + +Create an Accumulator integer variable, which tasks can "add" values + to using the add method. + +accumulator(double) - Method in class org.apache.spark.api.java.JavaSparkContext + +Create an Accumulator double variable, which tasks can "add" values + to using the add method. + +accumulator(double, String) - Method in class org.apache.spark.api.java.JavaSparkContext + +Create an Accumulator double variable, which tasks can "add" values + to using the add method. + +accumulator(T, AccumulatorParamT) - Method in class org.apache.spark.api.java.JavaSparkContext + +Create an Accumulator variable of a given type, which tasks can "add" + values to using the add method. + +accumulator(T, String, AccumulatorParamT) -
[14/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaPairRDD.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaPairRDD.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaPairRDD.html new file mode 100644 index 000..949ad06 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaPairRDD.html @@ -0,0 +1,2508 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +JavaPairRDD (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java +Class JavaPairRDDK,V + + + +Object + + +org.apache.spark.api.java.JavaPairRDDK,V + + + + + + + +All Implemented Interfaces: +java.io.Serializable, JavaRDDLikescala.Tuple2K,V,JavaPairRDDK,V + + +Direct Known Subclasses: +JavaHadoopRDD, JavaNewHadoopRDD + + + +public class JavaPairRDDK,V +extends Object +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +JavaPairRDD(RDDscala.Tuple2K,Vrdd, + scala.reflect.ClassTagKkClassTag, + scala.reflect.ClassTagVvClassTag) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +UJavaPairRDDK,U +aggregateByKey(UzeroValue, + Function2U,V,UseqFunc, + Function2U,U,UcombFunc) +Aggregate the values of each key, using given combine functions and a neutral "zero value". + + + +UJavaPairRDDK,U +aggregateByKey(UzeroValue, + intnumPartitions, + Function2U,V,UseqFunc, + Function2U,U,UcombFunc) +Aggregate the values of each key, using given combine functions and a neutral "zero value". + + + +UJavaPairRDDK,U +aggregateByKey(UzeroValue, + Partitionerpartitioner, + Function2U,V,UseqFunc, + Function2U,U,UcombFunc) +Aggregate the values of each key, using given combine functions and a neutral "zero value". + + + +JavaPairRDDK,V +cache() +Persist this RDD with the default storage level (`MEMORY_ONLY`). + + + +scala.reflect.ClassTagscala.Tuple2K,V +classTag() + + +JavaPairRDDK,V +coalesce(intnumPartitions) +Return a new RDD that is reduced into numPartitions partitions. + + + +JavaPairRDDK,V +coalesce(intnumPartitions, +booleanshuffle) +Return a new RDD that is reduced into numPartitions partitions. + + + +WJavaPairRDDK,scala.Tuple2IterableV,IterableW +cogroup(JavaPairRDDK,Wother) +For each key k in this or other, return a resulting RDD that contains a tuple with the + list of values for that key in this as well as other. + + + +WJavaPairRDDK,scala.Tuple2IterableV,IterableW +cogroup(JavaPairRDDK,Wother, + intnumPartitions) +For each key k in this or other, return a resulting RDD that contains a tuple with the + list of values for that key in this as well as other. + + + +WJavaPairRDDK,scala.Tuple2IterableV,IterableW +cogroup(JavaPairRDDK,Wother, + Partitionerpartitioner) +For each key k in this or other, return a resulting RDD that contains a tuple with the + list of values for that key in this as well as other. + + + +W1,W2JavaPairRDDK,scala.Tuple3IterableV,IterableW1,IterableW2 +cogroup(JavaPairRDDK,W1other1, + JavaPairRDDK,W2other2) +For each key k in this or other1 or other2, return a resulting RDD that contains a + tuple with the list of values for that key in this, other1 and other2. + + + +W1,W2JavaPairRDDK,scala.Tuple3IterableV,IterableW1,IterableW2 +cogroup(JavaPairRDDK,W1other1, + JavaPairRDDK,W2other2, + intnumPartitions) +For each key k in this or other1 or other2, return a resulting RDD that contains a + tuple with the list of values for that key in this, other1 and other2. + + + +W1,W2,W3JavaPairRDDK,scala.Tuple4IterableV,IterableW1,IterableW2,IterableW3 +cogroup(JavaPairRDDK,W1other1, + JavaPairRDDK,W2other2, + JavaPairRDDK,W3other3) +For each key k in this or other1 or other2 or other3, + return a resulting RDD that contains a tuple with the list of values + for that key in this, other1, other2 and other3. + + + +W1,W2,W3JavaPairRDDK,scala.Tuple4IterableV,IterableW1,IterableW2,IterableW3 +cogroup(JavaPairRDDK,W1other1, + JavaPairRDDK,W2other2, + JavaPairRDDK,W3other3, + intnumPartitions) +For
[10/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaSparkStatusTracker.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaSparkStatusTracker.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaSparkStatusTracker.html new file mode 100644 index 000..9ea2482 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaSparkStatusTracker.html @@ -0,0 +1,318 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +JavaSparkStatusTracker (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java +Class JavaSparkStatusTracker + + + +Object + + +org.apache.spark.api.java.JavaSparkStatusTracker + + + + + + + + +public class JavaSparkStatusTracker +extends Object +Low-level status reporting APIs for monitoring job and stage progress. + + These APIs intentionally provide very weak consistency semantics; consumers of these APIs should + be prepared to handle empty / missing information. For example, a job's stage ids may be known + but the status API may not have any information about the details of those stages, so + getStageInfo could potentially return null for a valid stage id. + + To limit memory usage, these APIs only provide information on recent jobs / stages. These APIs + will provide information for the last spark.ui.retainedStages stages and + spark.ui.retainedJobs jobs. + + NOTE: this class's constructor should be considered private and may be subject to change. + + + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +int[] +getActiveJobIds() +Returns an array containing the ids of all active jobs. + + + +int[] +getActiveStageIds() +Returns an array containing the ids of all active stages. + + + +int[] +getJobIdsForGroup(StringjobGroup) +Return a list of all known jobs in a particular job group. + + + +SparkJobInfo +getJobInfo(intjobId) +Returns job information, or null if the job info could not be found or was garbage collected. + + + +SparkStageInfo +getStageInfo(intstageId) +Returns stage information, or null if the stage info could not be found or was + garbage collected. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Method Detail + + + + + +getJobIdsForGroup +publicint[]getJobIdsForGroup(StringjobGroup) +Return a list of all known jobs in a particular job group. If jobGroup is null, then + returns all known jobs that are not associated with a job group. + + The returned list may contain running, failed, and completed jobs, and may vary across + invocations of this method. This method does not guarantee the order of the elements in + its result. +Parameters:jobGroup - (undocumented) +Returns:(undocumented) + + + + + + + +getActiveStageIds +publicint[]getActiveStageIds() +Returns an array containing the ids of all active stages. + + This method does not guarantee the order of the elements in its result. +Returns:(undocumented) + + + + + + + +getActiveJobIds +publicint[]getActiveJobIds() +Returns an array containing the ids of all active jobs. + + This method does not guarantee the order of the elements in its result. +Returns:(undocumented) + + + + + + + +getJobInfo +publicSparkJobInfogetJobInfo(intjobId) +Returns job information, or null if the job info could not be found or was garbage collected. +Parameters:jobId - (undocumented) +Returns:(undocumented) + + + + + + + +getStageInfo +publicSparkStageInfogetStageInfo(intstageId) +Returns stage information, or null if the stage info could not be found or was + garbage collected. +Parameters:stageId - (undocumented) +Returns:(undocumented) + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + +
[08/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/MapPartitionsFunction.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/MapPartitionsFunction.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/MapPartitionsFunction.html new file mode 100644 index 000..e11756b --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/MapPartitionsFunction.html @@ -0,0 +1,213 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +MapPartitionsFunction (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java.function +Interface MapPartitionsFunctionT,U + + + + + + +All Superinterfaces: +java.io.Serializable + + + +public interface MapPartitionsFunctionT,U +extends java.io.Serializable +Base interface for function used in Dataset's mapPartitions. + + + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +IterableU +call(java.util.IteratorTinput) + + + + + + + + + + + + + + + +Method Detail + + + + + +call +IterableUcall(java.util.IteratorTinput) + throws Exception +Throws: +Exception + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/PairFlatMapFunction.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/PairFlatMapFunction.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/PairFlatMapFunction.html new file mode 100644 index 000..435175e --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/PairFlatMapFunction.html @@ -0,0 +1,216 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +PairFlatMapFunction (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java.function +Interface PairFlatMapFunctionT,K,V + + + + + + +All Superinterfaces: +java.io.Serializable + + + +public interface PairFlatMapFunctionT,K,V +extends java.io.Serializable +A function that returns zero or more key-value pair records from each input record. The + key-value pairs are represented as scala.Tuple2 objects. + + + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +Iterablescala.Tuple2K,V +call(Tt) + + + + + + + + + + + + + + + +Method Detail + + + + + + + +call +Iterablescala.Tuple2K,Vcall(Tt) + throws Exception +Throws: +Exception + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + +
[42/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/printSchema.html -- diff --git a/site/docs/1.6.3/api/R/printSchema.html b/site/docs/1.6.3/api/R/printSchema.html new file mode 100644 index 000..64900a5 --- /dev/null +++ b/site/docs/1.6.3/api/R/printSchema.html @@ -0,0 +1,134 @@ + +R: Print Schema of a DataFrame + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +printSchema {SparkR}R Documentation + +Print Schema of a DataFrame + +Description + +Prints out the schema in tree format + + + +Usage + + +## S4 method for signature 'DataFrame' +printSchema(x) + + + +Arguments + + +x + +A SparkSQL DataFrame + + + + +See Also + +Other DataFrame functions: $, +$-, select, +select, +select,DataFrame,Column-method, +select,DataFrame,list-method, +selectExpr; DataFrame-class, +dataFrame, groupedData; +[, [, [[, +subset; agg, +agg, +count,GroupedData-method, +summarize, summarize; +arrange, arrange, +arrange, orderBy, +orderBy; as.data.frame, +as.data.frame,DataFrame-method; +attach, +attach,DataFrame-method; +cache; collect; +colnames, colnames, +colnames-, colnames-, +columns, names, +names-; coltypes, +coltypes, coltypes-, +coltypes-; columns, +dtypes, printSchema, +schema, schema; +count, nrow; +describe, describe, +describe, summary, +summary, +summary,PipelineModel-method; +dim; distinct, +unique; dropna, +dropna, fillna, +fillna, na.omit, +na.omit; dtypes; +except, except; +explain, explain; +filter, filter, +where, where; +first, first; +groupBy, groupBy, +group_by, group_by; +head; insertInto, +insertInto; intersect, +intersect; isLocal, +isLocal; join; +limit, limit; +merge, merge; +mutate, mutate, +transform, transform; +ncol; persist; +rbind, rbind, +unionAll, unionAll; +registerTempTable, +registerTempTable; rename, +rename, withColumnRenamed, +withColumnRenamed; +repartition; sample, +sample, sample_frac, +sample_frac; +saveAsParquetFile, +saveAsParquetFile, +write.parquet, write.parquet; +saveAsTable, saveAsTable; +saveDF, saveDF, +write.df, write.df, +write.df; selectExpr; +showDF, showDF; +show, show, +show,GroupedData-method; str; +take; unpersist; +withColumn, withColumn; +write.json, write.json; +write.text, write.text + + + +Examples + +## Not run: +##D sc - sparkR.init() +##D sqlContext - sparkRSQL.init(sc) +##D path - path/to/file.json +##D df - read.json(sqlContext, path) +##D printSchema(df) +## End(Not run) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/quarter.html -- diff --git a/site/docs/1.6.3/api/R/quarter.html b/site/docs/1.6.3/api/R/quarter.html new file mode 100644 index 000..b7b033d --- /dev/null +++ b/site/docs/1.6.3/api/R/quarter.html @@ -0,0 +1,70 @@ + +R: quarter + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +quarter {SparkR}R Documentation + +quarter + +Description + +Extracts the quarter as an integer from a given date/timestamp/string. + + + +Usage + + +## S4 method for signature 'Column' +quarter(x) + +quarter(x) + + + +See Also + +Other datetime_funcs: add_months, +add_months; date_add, +date_add; date_format, +date_format; date_sub, +date_sub; datediff, +datediff; dayofmonth, +dayofmonth; dayofyear, +dayofyear; from_unixtime, +from_unixtime; +from_utc_timestamp, +from_utc_timestamp; hour, +hour; last_day, +last_day; minute, +minute; months_between, +months_between; month, +month; next_day, +next_day; second, +second; to_date, +to_date; to_utc_timestamp, +to_utc_timestamp; +unix_timestamp, +unix_timestamp, +unix_timestamp, +unix_timestamp; weekofyear, +weekofyear; year, +year + + + +Examples + +## Not run: quarter(df$c) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/rand.html -- diff --git a/site/docs/1.6.3/api/R/rand.html b/site/docs/1.6.3/api/R/rand.html new file mode 100644 index 000..050f9dc --- /dev/null +++ b/site/docs/1.6.3/api/R/rand.html @@ -0,0 +1,62 @@ + +R: rand + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad();
[46/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/explain.html -- diff --git a/site/docs/1.6.3/api/R/explain.html b/site/docs/1.6.3/api/R/explain.html new file mode 100644 index 000..3fea286 --- /dev/null +++ b/site/docs/1.6.3/api/R/explain.html @@ -0,0 +1,139 @@ + +R: Explain + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +explain {SparkR}R Documentation + +Explain + +Description + +Print the logical and physical Catalyst plans to the console for debugging. + + + +Usage + + +## S4 method for signature 'DataFrame' +explain(x, extended = FALSE) + +explain(x, ...) + + + +Arguments + + +x + +A SparkSQL DataFrame + +extended + +Logical. If extended is False, explain() only prints the physical plan. + + + + +See Also + +Other DataFrame functions: $, +$-, select, +select, +select,DataFrame,Column-method, +select,DataFrame,list-method, +selectExpr; DataFrame-class, +dataFrame, groupedData; +[, [, [[, +subset; agg, +agg, +count,GroupedData-method, +summarize, summarize; +arrange, arrange, +arrange, orderBy, +orderBy; as.data.frame, +as.data.frame,DataFrame-method; +attach, +attach,DataFrame-method; +cache; collect; +colnames, colnames, +colnames-, colnames-, +columns, names, +names-; coltypes, +coltypes, coltypes-, +coltypes-; columns, +dtypes, printSchema, +schema, schema; +count, nrow; +describe, describe, +describe, summary, +summary, +summary,PipelineModel-method; +dim; distinct, +unique; dropna, +dropna, fillna, +fillna, na.omit, +na.omit; dtypes; +except, except; +filter, filter, +where, where; +first, first; +groupBy, groupBy, +group_by, group_by; +head; insertInto, +insertInto; intersect, +intersect; isLocal, +isLocal; join; +limit, limit; +merge, merge; +mutate, mutate, +transform, transform; +ncol; persist; +printSchema; rbind, +rbind, unionAll, +unionAll; registerTempTable, +registerTempTable; rename, +rename, withColumnRenamed, +withColumnRenamed; +repartition; sample, +sample, sample_frac, +sample_frac; +saveAsParquetFile, +saveAsParquetFile, +write.parquet, write.parquet; +saveAsTable, saveAsTable; +saveDF, saveDF, +write.df, write.df, +write.df; selectExpr; +showDF, showDF; +show, show, +show,GroupedData-method; str; +take; unpersist; +withColumn, withColumn; +write.json, write.json; +write.text, write.text + + + +Examples + +## Not run: +##D sc - sparkR.init() +##D sqlContext - sparkRSQL.init(sc) +##D path - path/to/file.json +##D df - read.json(sqlContext, path) +##D explain(df, TRUE) +## End(Not run) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/explode.html -- diff --git a/site/docs/1.6.3/api/R/explode.html b/site/docs/1.6.3/api/R/explode.html new file mode 100644 index 000..1277a4b --- /dev/null +++ b/site/docs/1.6.3/api/R/explode.html @@ -0,0 +1,48 @@ + +R: explode + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +explode {SparkR}R Documentation + +explode + +Description + +Creates a new row for each element in the given array or map column. + + + +Usage + + +## S4 method for signature 'Column' +explode(x) + +explode(x) + + + +See Also + +Other collection_funcs: array_contains, +array_contains; size, +size; sort_array, +sort_array + + + +Examples + +## Not run: explode(df$c) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/expm1.html -- diff --git a/site/docs/1.6.3/api/R/expm1.html b/site/docs/1.6.3/api/R/expm1.html new file mode 100644 index 000..f3cedac --- /dev/null +++ b/site/docs/1.6.3/api/R/expm1.html @@ -0,0 +1,69 @@ + +R: expm1 + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +expm1 {SparkR}R Documentation + +expm1 + +Description + +Computes the exponential of the given value minus one. + + + +Usage + + +## S4 method for signature 'Column' +expm1(x) + + + +See Also + +Other math_funcs: acos; asin; +atan2; atan; +bin, bin; cbrt, +cbrt; ceil, +ceil, ceiling; +conv, conv; +corr; cosh; +cos; exp; +factorial; floor; +hex, hex; +hypot,
[44/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/last_day.html -- diff --git a/site/docs/1.6.3/api/R/last_day.html b/site/docs/1.6.3/api/R/last_day.html new file mode 100644 index 000..e371d3a --- /dev/null +++ b/site/docs/1.6.3/api/R/last_day.html @@ -0,0 +1,72 @@ + +R: last_day + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +last_day {SparkR}R Documentation + +last_day + +Description + +Given a date column, returns the last day of the month which the given date belongs to. +For example, input 2015-07-27 returns 2015-07-31 since July 31 is the last day of the +month in July 2015. + + + +Usage + + +## S4 method for signature 'Column' +last_day(x) + +last_day(x) + + + +See Also + +Other datetime_funcs: add_months, +add_months; date_add, +date_add; date_format, +date_format; date_sub, +date_sub; datediff, +datediff; dayofmonth, +dayofmonth; dayofyear, +dayofyear; from_unixtime, +from_unixtime; +from_utc_timestamp, +from_utc_timestamp; hour, +hour; minute, +minute; months_between, +months_between; month, +month; next_day, +next_day; quarter, +quarter; second, +second; to_date, +to_date; to_utc_timestamp, +to_utc_timestamp; +unix_timestamp, +unix_timestamp, +unix_timestamp, +unix_timestamp; weekofyear, +weekofyear; year, +year + + + +Examples + +## Not run: last_day(df$c) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/lead.html -- diff --git a/site/docs/1.6.3/api/R/lead.html b/site/docs/1.6.3/api/R/lead.html new file mode 100644 index 000..648e0ad --- /dev/null +++ b/site/docs/1.6.3/api/R/lead.html @@ -0,0 +1,60 @@ + +R: lead + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +lead {SparkR}R Documentation + +lead + +Description + +Window function: returns the value that is 'offset' rows after the current row, and +'null' if there is less than 'offset' rows after the current row. For example, +an 'offset' of one will return the next row at any given point in the window partition. + + + +Usage + + +## S4 method for signature 'characterOrColumn,numeric' +lead(x, offset, defaultValue = NULL) + +lead(x, offset, defaultValue = NULL) + + + +Details + +This is equivalent to the LEAD function in SQL. + + + +See Also + +Other window_funcs: cume_dist, +cume_dist; dense_rank, +dense_rank; lag, +lag; ntile, +ntile; percent_rank, +percent_rank; rank, +rank; row_number, +row_number + + + +Examples + +## Not run: lead(df$c) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/least.html -- diff --git a/site/docs/1.6.3/api/R/least.html b/site/docs/1.6.3/api/R/least.html new file mode 100644 index 000..6151dcf --- /dev/null +++ b/site/docs/1.6.3/api/R/least.html @@ -0,0 +1,60 @@ + +R: least + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +least {SparkR}R Documentation + +least + +Description + +Returns the least value of the list of column names, skipping null values. +This function takes at least 2 parameters. It will return null if all parameters are null. + + + +Usage + + +## S4 method for signature 'Column' +least(x, ...) + +least(x, ...) + + + +See Also + +Other normal_funcs: abs; +bitwiseNOT, bitwiseNOT; +col, column, +column; expr, +expr; greatest, +greatest; ifelse; +is.nan, isnan, +isnan; lit, +lit; nanvl, +nanvl; negate, +negate; randn, +randn, randn; +rand, rand, +rand; struct, +struct; when + + + +Examples + +## Not run: least(df$c, df$d) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/length.html -- diff --git a/site/docs/1.6.3/api/R/length.html b/site/docs/1.6.3/api/R/length.html new file mode 100644 index 000..38743a6 --- /dev/null +++ b/site/docs/1.6.3/api/R/length.html @@ -0,0 +1,70 @@ + +R: length + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;>
[38/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/to_date.html -- diff --git a/site/docs/1.6.3/api/R/to_date.html b/site/docs/1.6.3/api/R/to_date.html new file mode 100644 index 000..0230386 --- /dev/null +++ b/site/docs/1.6.3/api/R/to_date.html @@ -0,0 +1,70 @@ + +R: to_date + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +to_date {SparkR}R Documentation + +to_date + +Description + +Converts the column into DateType. + + + +Usage + + +## S4 method for signature 'Column' +to_date(x) + +to_date(x) + + + +See Also + +Other datetime_funcs: add_months, +add_months; date_add, +date_add; date_format, +date_format; date_sub, +date_sub; datediff, +datediff; dayofmonth, +dayofmonth; dayofyear, +dayofyear; from_unixtime, +from_unixtime; +from_utc_timestamp, +from_utc_timestamp; hour, +hour; last_day, +last_day; minute, +minute; months_between, +months_between; month, +month; next_day, +next_day; quarter, +quarter; second, +second; to_utc_timestamp, +to_utc_timestamp; +unix_timestamp, +unix_timestamp, +unix_timestamp, +unix_timestamp; weekofyear, +weekofyear; year, +year + + + +Examples + +## Not run: to_date(df$c) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/to_utc_timestamp.html -- diff --git a/site/docs/1.6.3/api/R/to_utc_timestamp.html b/site/docs/1.6.3/api/R/to_utc_timestamp.html new file mode 100644 index 000..c2ea7d2 --- /dev/null +++ b/site/docs/1.6.3/api/R/to_utc_timestamp.html @@ -0,0 +1,69 @@ + +R: to_utc_timestamp + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +to_utc_timestamp {SparkR}R Documentation + +to_utc_timestamp + +Description + +Assumes given timestamp is in given timezone and converts to UTC. + + + +Usage + + +## S4 method for signature 'Column,character' +to_utc_timestamp(y, x) + +to_utc_timestamp(y, x) + + + +See Also + +Other datetime_funcs: add_months, +add_months; date_add, +date_add; date_format, +date_format; date_sub, +date_sub; datediff, +datediff; dayofmonth, +dayofmonth; dayofyear, +dayofyear; from_unixtime, +from_unixtime; +from_utc_timestamp, +from_utc_timestamp; hour, +hour; last_day, +last_day; minute, +minute; months_between, +months_between; month, +month; next_day, +next_day; quarter, +quarter; second, +second; to_date, +to_date; unix_timestamp, +unix_timestamp, +unix_timestamp, +unix_timestamp; weekofyear, +weekofyear; year, +year + + + +Examples + +## Not run: to_utc_timestamp(df$t, PST) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/translate.html -- diff --git a/site/docs/1.6.3/api/R/translate.html b/site/docs/1.6.3/api/R/translate.html new file mode 100644 index 000..ce15524 --- /dev/null +++ b/site/docs/1.6.3/api/R/translate.html @@ -0,0 +1,76 @@ + +R: translate + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +translate {SparkR}R Documentation + +translate + +Description + +Translate any character in the src by a character in replaceString. +The characters in replaceString is corresponding to the characters in matchingString. +The translate will happen when any character in the string matching with the character +in the matchingString. + + + +Usage + + +## S4 method for signature 'Column,character,character' +translate(x, matchingString, + replaceString) + +translate(x, matchingString, replaceString) + + + +See Also + +Other string_funcs: ascii, +ascii; base64, +base64; concat_ws, +concat_ws; concat, +concat; decode, +decode; encode, +encode; format_number, +format_number; format_string, +format_string; initcap, +initcap; instr, +instr; length; +levenshtein, levenshtein; +locate, locate; +lower, lower; +lpad, lpad; +ltrim, ltrim; +regexp_extract, +regexp_extract; +regexp_replace, +regexp_replace; reverse, +reverse; rpad, +rpad; rtrim, +rtrim; soundex, +soundex; substring_index, +substring_index; trim, +trim; unbase64, +unbase64; upper, +upper + + + +Examples + +## Not run: translate(df$c, rnlt, 123) + + +
[28/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/CleanCheckpoint.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/CleanCheckpoint.html b/site/docs/1.6.3/api/java/org/apache/spark/CleanCheckpoint.html new file mode 100644 index 000..eaef884 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/CleanCheckpoint.html @@ -0,0 +1,274 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +CleanCheckpoint (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class CleanCheckpoint + + + +Object + + +org.apache.spark.CleanCheckpoint + + + + + + + +All Implemented Interfaces: +java.io.Serializable, CleanupTask, scala.Equals, scala.Product + + + +public class CleanCheckpoint +extends Object +implements CleanupTask, scala.Product, scala.Serializable +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +CleanCheckpoint(intrddId) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +int +rddId() + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfacescala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interfacescala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +CleanCheckpoint +publicCleanCheckpoint(intrddId) + + + + + + + + + +Method Detail + + + + + +rddId +publicintrddId() + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/CleanRDD.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/CleanRDD.html b/site/docs/1.6.3/api/java/org/apache/spark/CleanRDD.html new file mode 100644 index 000..f146e41 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/CleanRDD.html @@ -0,0 +1,274 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +CleanRDD (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class CleanRDD + + + +Object + + +org.apache.spark.CleanRDD + + + + + + + +All Implemented Interfaces: +java.io.Serializable, CleanupTask, scala.Equals, scala.Product + + + +public class CleanRDD +extends Object +implements CleanupTask, scala.Product, scala.Serializable +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +CleanRDD(intrddId) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +int +rddId() + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfacescala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interfacescala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +CleanRDD +publicCleanRDD(intrddId) + + + + + + + + + +Method Detail + +
[21/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.html b/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.html new file mode 100644 index 000..786b34c --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.html @@ -0,0 +1,2961 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +SparkContext (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class SparkContext + + + +Object + + +org.apache.spark.SparkContext + + + + + + + +All Implemented Interfaces: +Logging + + + +public class SparkContext +extends Object +implements Logging +Main entry point for Spark functionality. A SparkContext represents the connection to a Spark + cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster. + + Only one SparkContext may be active per JVM. You must stop() the active SparkContext before + creating a new one. This limitation may eventually be removed; see SPARK-2243 for more details. + + param: config a Spark Config object describing the application configuration. Any settings in + this config overrides the default configs as well as system properties. + + + + + + + + + + + +Nested Class Summary + +Nested Classes + +Modifier and Type +Class and Description + + +static class +SparkContext.DoubleAccumulatorParam$ + + +static class +SparkContext.FloatAccumulatorParam$ + + +static class +SparkContext.IntAccumulatorParam$ + + +static class +SparkContext.LongAccumulatorParam$ + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +SparkContext() +Create a SparkContext that loads settings from system properties (for instance, when + launching with ./bin/spark-submit). + + + +SparkContext(SparkConfconfig) + + +SparkContext(SparkConfconfig, +scala.collection.MapString,scala.collection.SetSplitInfopreferredNodeLocationData) +:: DeveloperApi :: + Alternative constructor for setting preferred locations where Spark will create executors. + + + +SparkContext(Stringmaster, +StringappName, +SparkConfconf) +Alternative constructor that allows setting common Spark properties directly + + + +SparkContext(Stringmaster, +StringappName, +StringsparkHome, +scala.collection.SeqStringjars, +scala.collection.MapString,Stringenvironment, +scala.collection.MapString,scala.collection.SetSplitInfopreferredNodeLocationData) +Alternative constructor that allows setting common Spark properties directly + + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +R,TAccumulableR,T +accumulable(RinitialValue, + AccumulableParamR,Tparam) +Create an Accumulable shared variable, to which tasks can add values + with +=. + + + +R,TAccumulableR,T +accumulable(RinitialValue, + Stringname, + AccumulableParamR,Tparam) +Create an Accumulable shared variable, with a name for display in the + Spark UI. + + + +R,TAccumulableR,T +accumulableCollection(RinitialValue, + scala.Function1R,scala.collection.generic.GrowableTevidence$9, + scala.reflect.ClassTagRevidence$10) +Create an accumulator from a "mutable collection" type. + + + +TAccumulatorT +accumulator(TinitialValue, + AccumulatorParamTparam) +Create an Accumulator variable of a given type, which tasks can "add" + values to using the += method. + + + +TAccumulatorT +accumulator(TinitialValue, + Stringname, + AccumulatorParamTparam) +Create an Accumulator variable of a given type, with a name for display + in the Spark UI. + + + +scala.collection.mutable.HashMapString,Object +addedFiles() + + +scala.collection.mutable.HashMapString,Object +addedJars() + + +void +addFile(Stringpath) +Add a file to be downloaded with this Spark job on every node. + + + +void +addFile(Stringpath, + booleanrecursive) +Add a file to be downloaded with this Spark job on every node. + + + +void +addJar(Stringpath) +Adds a JAR dependency for all tasks to be executed on this SparkContext in the future. + + + +void
[30/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/Accumulable.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/Accumulable.html b/site/docs/1.6.3/api/java/org/apache/spark/Accumulable.html new file mode 100644 index 000..bea826e --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/Accumulable.html @@ -0,0 +1,442 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Accumulable (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class AccumulableR,T + + + +Object + + +org.apache.spark.AccumulableR,T + + + + + + + +All Implemented Interfaces: +java.io.Serializable + + +Direct Known Subclasses: +Accumulator + + + +public class AccumulableR,T +extends Object +implements java.io.Serializable +A data type that can be accumulated, ie has an commutative and associative "add" operation, + but where the result type, R, may be different from the element type being added, T. + + You must define how to add data, and how to merge two of these together. For some data types, + such as a counter, these might be the same operation. In that case, you can use the simpler + Accumulator. They won't always be the same, though -- e.g., imagine you are + accumulating a set. You will add items to the set, and you will union two sets together. + + param: initialValue initial value of accumulator + param: param helper object defining how to add elements of type R and T + param: name human-readable name for use in Spark's web UI + param: internal if this Accumulable is internal. Internal Accumulables will be reported + to the driver via heartbeats. For internal Accumulables, R must be + thread safe so that they can be reported correctly. +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +Accumulable(RinitialValue, + AccumulableParamR,Tparam) + + +Accumulable(RinitialValue, + AccumulableParamR,Tparam, + scala.OptionStringname) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +void +add(Tterm) +Add more data to this accumulator / accumulable + + + +long +id() + + +R +localValue() +Get the current value of this accumulator from within a task. + + + +void +merge(Rterm) +Merge two accumulable objects together + + + +scala.OptionString +name() + + +void +setValue(RnewValue) +Set the accumulator's value; only allowed on master + + + +String +toString() + + +R +value() +Access the accumulator's current value; only allowed on master. + + + +R +zero() + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + + + +Accumulable +publicAccumulable(RinitialValue, + AccumulableParamR,Tparam, + scala.OptionStringname) + + + + + + + + + +Accumulable +publicAccumulable(RinitialValue, + AccumulableParamR,Tparam) + + + + + + + + + +Method Detail + + + + + +name +publicscala.OptionStringname() + + + + + + + +id +publiclongid() + + + + + + + +zero +publicRzero() + + + + + + + + + +add +publicvoidadd(Tterm) +Add more data to this accumulator / accumulable +Parameters:term - the data to add + + + + + + + + + +merge +publicvoidmerge(Rterm) +Merge two accumulable objects together + + Normally, a user will not want to use this version, but will instead call add. +Parameters:term - the other R that will get merged with this + + + + + + + +value +publicRvalue() +Access the accumulator's current value; only allowed on master. +Returns:(undocumented) + + + + + + + +localValue +publicRlocalValue() +Get the current value of this accumulator from within a task. + + This is NOT the global value of the accumulator. To get the global value after a + completed operation on the dataset, call value. + + The typical use of this method is to directly mutate the local value, eg., to add + an element to a Set. +Returns:(undocumented) + + + + + + + + + +setValue +publicvoidsetValue(RnewValue) +Set the accumulator's value; only allowed on master +Parameters:newValue - (undocumented) + + + + + + + +toString +publicStringtoString() +
[11/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaSparkContext.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaSparkContext.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaSparkContext.html new file mode 100644 index 000..d566d27 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaSparkContext.html @@ -0,0 +1,2073 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +JavaSparkContext (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java +Class JavaSparkContext + + + +Object + + +org.apache.spark.api.java.JavaSparkContext + + + + + + + +All Implemented Interfaces: +java.io.Closeable, AutoCloseable + + + +public class JavaSparkContext +extends Object +implements java.io.Closeable +A Java-friendly version of SparkContext that returns + JavaRDDs and works with Java collections instead of Scala ones. + + Only one SparkContext may be active per JVM. You must stop() the active SparkContext before + creating a new one. This limitation may eventually be removed; see SPARK-2243 for more details. + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +JavaSparkContext() +Create a JavaSparkContext that loads settings from system properties (for instance, when + launching with ./bin/spark-submit). + + + +JavaSparkContext(SparkConfconf) + + +JavaSparkContext(SparkContextsc) + + +JavaSparkContext(Stringmaster, +StringappName) + + +JavaSparkContext(Stringmaster, +StringappName, +SparkConfconf) + + +JavaSparkContext(Stringmaster, +StringappName, +StringsparkHome, +StringjarFile) + + +JavaSparkContext(Stringmaster, +StringappName, +StringsparkHome, +String[]jars) + + +JavaSparkContext(Stringmaster, +StringappName, +StringsparkHome, +String[]jars, + java.util.MapString,Stringenvironment) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +T,RAccumulableT,R +accumulable(TinitialValue, + AccumulableParamT,Rparam) +Create an Accumulable shared variable of the given type, to which tasks + can "add" values with add. + + + +T,RAccumulableT,R +accumulable(TinitialValue, + Stringname, + AccumulableParamT,Rparam) +Create an Accumulable shared variable of the given type, to which tasks + can "add" values with add. + + + +AccumulatorDouble +accumulator(doubleinitialValue) +Create an Accumulator double variable, which tasks can "add" values + to using the add method. + + + +AccumulatorDouble +accumulator(doubleinitialValue, + Stringname) +Create an Accumulator double variable, which tasks can "add" values + to using the add method. + + + +AccumulatorInteger +accumulator(intinitialValue) +Create an Accumulator integer variable, which tasks can "add" values + to using the add method. + + + +AccumulatorInteger +accumulator(intinitialValue, + Stringname) +Create an Accumulator integer variable, which tasks can "add" values + to using the add method. + + + +TAccumulatorT +accumulator(TinitialValue, + AccumulatorParamTaccumulatorParam) +Create an Accumulator variable of a given type, which tasks can "add" + values to using the add method. + + + +TAccumulatorT +accumulator(TinitialValue, + Stringname, + AccumulatorParamTaccumulatorParam) +Create an Accumulator variable of a given type, which tasks can "add" + values to using the add method. + + + +void +addFile(Stringpath) +Add a file to be downloaded with this Spark job on every node. + + + +void +addJar(Stringpath) +Adds a JAR dependency for all tasks to be executed on this SparkContext in the future. + + + +String +appName() + + +JavaPairRDDString,PortableDataStream +binaryFiles(Stringpath) +Read a directory of binary files from HDFS, a local file system (available on all nodes), + or any Hadoop-supported file system URI as a byte array. + + + +JavaPairRDDString,PortableDataStream +binaryFiles(Stringpath, + intminPartitions) +Read a
[12/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaRDDLike.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaRDDLike.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaRDDLike.html new file mode 100644 index 000..de4b6d8 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/JavaRDDLike.html @@ -0,0 +1,1721 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +JavaRDDLike (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java +Interface JavaRDDLikeT,This extends JavaRDDLikeT,This + + + + + + +All Superinterfaces: +java.io.Serializable + + +All Known Implementing Classes: +JavaDoubleRDD, JavaHadoopRDD, JavaNewHadoopRDD, JavaPairRDD, JavaRDD + + + +public interface JavaRDDLikeT,This extends JavaRDDLikeT,This +extends scala.Serializable +Defines operations common to several Java RDD implementations. + Note that this trait is not intended to be implemented by user code. + + + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +UU +aggregate(UzeroValue, + Function2U,T,UseqOp, + Function2U,U,UcombOp) +Aggregate the elements of each partition, and then the results for all the partitions, using + given combine functions and a neutral "zero value". + + + +UJavaPairRDDT,U +cartesian(JavaRDDLikeU,?other) +Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of + elements (a, b) where a is in this and b is in other. + + + +void +checkpoint() +Mark this RDD for checkpointing. + + + +scala.reflect.ClassTagT +classTag() + + +java.util.ListT +collect() +Return an array that contains all of the elements in this RDD. + + + +JavaFutureActionjava.util.ListT +collectAsync() +The asynchronous version of collect, which returns a future for + retrieving an array containing all of the elements in this RDD. + + + +java.util.ListT[] +collectPartitions(int[]partitionIds) +Return an array that contains all of the elements in a specific partition of this RDD. + + + +SparkContext +context() +The SparkContext that this RDD was created on. + + + +long +count() +Return the number of elements in the RDD. + + + +PartialResultBoundedDouble +countApprox(longtimeout) +Approximate version of count() that returns a potentially incomplete result + within a timeout, even if not all tasks have finished. + + + +PartialResultBoundedDouble +countApprox(longtimeout, + doubleconfidence) +Approximate version of count() that returns a potentially incomplete result + within a timeout, even if not all tasks have finished. + + + +long +countApproxDistinct(doublerelativeSD) +Return approximate number of distinct elements in the RDD. + + + +JavaFutureActionLong +countAsync() +The asynchronous version of count, which returns a + future for counting the number of elements in this RDD. + + + +java.util.MapT,Long +countByValue() +Return the count of each unique value in this RDD as a map of (value, count) pairs. + + + +PartialResultjava.util.MapT,BoundedDouble +countByValueApprox(longtimeout) +(Experimental) Approximate version of countByValue(). + + + +PartialResultjava.util.MapT,BoundedDouble +countByValueApprox(longtimeout, + doubleconfidence) +(Experimental) Approximate version of countByValue(). + + + +T +first() +Return the first element in this RDD. + + + +UJavaRDDU +flatMap(FlatMapFunctionT,Uf) +Return a new RDD by first applying a function to all elements of this + RDD, and then flattening the results. + + + +JavaDoubleRDD +flatMapToDouble(DoubleFlatMapFunctionTf) +Return a new RDD by first applying a function to all elements of this + RDD, and then flattening the results. + + + +K2,V2JavaPairRDDK2,V2 +flatMapToPair(PairFlatMapFunctionT,K2,V2f) +Return a new RDD by first applying a function to all elements of this + RDD, and then flattening the results. + + + +T +fold(TzeroValue, +Function2T,T,Tf) +Aggregate the elements of each partition, and then the results for all the partitions, using a + given associative and commutative function and a neutral "zero value". + + + +void +foreach(VoidFunctionTf) +Applies a function f to all elements of this RDD. + + +
[20/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/SparkEnv.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/SparkEnv.html b/site/docs/1.6.3/api/java/org/apache/spark/SparkEnv.html new file mode 100644 index 000..2108dc3 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/SparkEnv.html @@ -0,0 +1,613 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +SparkEnv (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class SparkEnv + + + +Object + + +org.apache.spark.SparkEnv + + + + + + + +All Implemented Interfaces: +Logging + + + +public class SparkEnv +extends Object +implements Logging +:: DeveloperApi :: + Holds all the runtime environment objects for a running Spark instance (either master or worker), + including the serializer, Akka actor system, block manager, map output tracker, etc. Currently + Spark code finds the SparkEnv through a global variable, so all the threads can access the same + SparkEnv. It can be accessed by SparkEnv.get (e.g. after creating a SparkContext). + + NOTE: This is not intended for external use. This is exposed for Shark and may be made private + in a future release. + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +SparkEnv(StringexecutorId, +org.apache.spark.rpc.RpcEnvrpcEnv, +akka.actor.ActorSystem_actorSystem, +Serializerserializer, +SerializerclosureSerializer, +org.apache.spark.CacheManagercacheManager, +org.apache.spark.MapOutputTrackermapOutputTracker, +org.apache.spark.shuffle.ShuffleManagershuffleManager, +org.apache.spark.broadcast.BroadcastManagerbroadcastManager, + org.apache.spark.network.BlockTransferServiceblockTransferService, +org.apache.spark.storage.BlockManagerblockManager, +org.apache.spark.SecurityManagersecurityManager, +StringsparkFilesDir, +org.apache.spark.metrics.MetricsSystemmetricsSystem, +org.apache.spark.memory.MemoryManagermemoryManager, + org.apache.spark.scheduler.OutputCommitCoordinatoroutputCommitCoordinator, +SparkConfconf) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +akka.actor.ActorSystem +actorSystem() + + +org.apache.spark.storage.BlockManager +blockManager() + + +org.apache.spark.network.BlockTransferService +blockTransferService() + + +org.apache.spark.broadcast.BroadcastManager +broadcastManager() + + +org.apache.spark.CacheManager +cacheManager() + + +Serializer +closureSerializer() + + +SparkConf +conf() + + +static String +driverActorSystemName() + + +static String +executorActorSystemName() + + +String +executorId() + + +static SparkEnv +get() +Returns the SparkEnv. + + + +static SparkEnv +getThreadLocal() +Returns the ThreadLocal SparkEnv. + + + +java.util.concurrent.ConcurrentMapString,Object +hadoopJobMetadata() + + +boolean +isStopped() + + +org.apache.spark.MapOutputTracker +mapOutputTracker() + + +org.apache.spark.memory.MemoryManager +memoryManager() + + +org.apache.spark.metrics.MetricsSystem +metricsSystem() + + +org.apache.spark.scheduler.OutputCommitCoordinator +outputCommitCoordinator() + + +org.apache.spark.rpc.RpcEnv +rpcEnv() + + +org.apache.spark.SecurityManager +securityManager() + + +Serializer +serializer() + + +static void +set(SparkEnve) + + +org.apache.spark.shuffle.ShuffleManager +shuffleManager() + + +String +sparkFilesDir() + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.Logging +initializeIfNecessary, initializeLogging, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning + + + + + + + + + + + + + + +Constructor Detail + + + + + +SparkEnv +publicSparkEnv(StringexecutorId, +org.apache.spark.rpc.RpcEnvrpcEnv, +akka.actor.ActorSystem_actorSystem, +Serializerserializer, +SerializerclosureSerializer, +org.apache.spark.CacheManagercacheManager, +org.apache.spark.MapOutputTrackermapOutputTracker, +
[43/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/minute.html -- diff --git a/site/docs/1.6.3/api/R/minute.html b/site/docs/1.6.3/api/R/minute.html new file mode 100644 index 000..8164c7f --- /dev/null +++ b/site/docs/1.6.3/api/R/minute.html @@ -0,0 +1,70 @@ + +R: minute + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +minute {SparkR}R Documentation + +minute + +Description + +Extracts the minutes as an integer from a given date/timestamp/string. + + + +Usage + + +## S4 method for signature 'Column' +minute(x) + +minute(x) + + + +See Also + +Other datetime_funcs: add_months, +add_months; date_add, +date_add; date_format, +date_format; date_sub, +date_sub; datediff, +datediff; dayofmonth, +dayofmonth; dayofyear, +dayofyear; from_unixtime, +from_unixtime; +from_utc_timestamp, +from_utc_timestamp; hour, +hour; last_day, +last_day; months_between, +months_between; month, +month; next_day, +next_day; quarter, +quarter; second, +second; to_date, +to_date; to_utc_timestamp, +to_utc_timestamp; +unix_timestamp, +unix_timestamp, +unix_timestamp, +unix_timestamp; weekofyear, +weekofyear; year, +year + + + +Examples + +## Not run: minute(df$c) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/month.html -- diff --git a/site/docs/1.6.3/api/R/month.html b/site/docs/1.6.3/api/R/month.html new file mode 100644 index 000..fa2f632 --- /dev/null +++ b/site/docs/1.6.3/api/R/month.html @@ -0,0 +1,70 @@ + +R: month + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +month {SparkR}R Documentation + +month + +Description + +Extracts the month as an integer from a given date/timestamp/string. + + + +Usage + + +## S4 method for signature 'Column' +month(x) + +month(x) + + + +See Also + +Other datetime_funcs: add_months, +add_months; date_add, +date_add; date_format, +date_format; date_sub, +date_sub; datediff, +datediff; dayofmonth, +dayofmonth; dayofyear, +dayofyear; from_unixtime, +from_unixtime; +from_utc_timestamp, +from_utc_timestamp; hour, +hour; last_day, +last_day; minute, +minute; months_between, +months_between; next_day, +next_day; quarter, +quarter; second, +second; to_date, +to_date; to_utc_timestamp, +to_utc_timestamp; +unix_timestamp, +unix_timestamp, +unix_timestamp, +unix_timestamp; weekofyear, +weekofyear; year, +year + + + +Examples + +## Not run: month(df$c) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/months_between.html -- diff --git a/site/docs/1.6.3/api/R/months_between.html b/site/docs/1.6.3/api/R/months_between.html new file mode 100644 index 000..901e093 --- /dev/null +++ b/site/docs/1.6.3/api/R/months_between.html @@ -0,0 +1,70 @@ + +R: months_between + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +months_between {SparkR}R Documentation + +months_between + +Description + +Returns number of months between dates 'date1' and 'date2'. + + + +Usage + + +## S4 method for signature 'Column' +months_between(y, x) + +months_between(y, x) + + + +See Also + +Other datetime_funcs: add_months, +add_months; date_add, +date_add; date_format, +date_format; date_sub, +date_sub; datediff, +datediff; dayofmonth, +dayofmonth; dayofyear, +dayofyear; from_unixtime, +from_unixtime; +from_utc_timestamp, +from_utc_timestamp; hour, +hour; last_day, +last_day; minute, +minute; month, +month; next_day, +next_day; quarter, +quarter; second, +second; to_date, +to_date; to_utc_timestamp, +to_utc_timestamp; +unix_timestamp, +unix_timestamp, +unix_timestamp, +unix_timestamp; weekofyear, +weekofyear; year, +year + + + +Examples + +## Not run: months_between(df$c, x) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/mutate.html -- diff --git a/site/docs/1.6.3/api/R/mutate.html b/site/docs/1.6.3/api/R/mutate.html new file mode
[36/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/allclasses-frame.html -- diff --git a/site/docs/1.6.3/api/java/allclasses-frame.html b/site/docs/1.6.3/api/java/allclasses-frame.html new file mode 100644 index 000..3ed8155 --- /dev/null +++ b/site/docs/1.6.3/api/java/allclasses-frame.html @@ -0,0 +1,791 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +All Classes (Spark 1.6.3 JavaDoc) + + + + +All Classes + + +AbsoluteError +Accumulable +AccumulableInfo +AccumulableInfo +AccumulableParam +Accumulator +AccumulatorParam +AccumulatorParam.DoubleAccumulatorParam$ +AccumulatorParam.FloatAccumulatorParam$ +AccumulatorParam.IntAccumulatorParam$ +AccumulatorParam.LongAccumulatorParam$ +ActorHelper +ActorSupervisorStrategy +AFTAggregator +AFTCostFun +AFTSurvivalRegression +AFTSurvivalRegressionModel +AggregatedDialect +AggregatingEdgeContext +Aggregator +Aggregator +Algo +AlphaComponent +ALS +ALS +ALS.Rating +ALS.Rating$ +ALSModel +AnalysisException +And +ApplicationAttemptInfo +ApplicationInfo +ApplicationStatus +ArrayType +AskPermissionToCommitOutput +AssociationRules +AssociationRules.Rule +AsyncRDDActions +Attribute +AttributeGroup +AttributeType +BaseRelation +BaseRRDD +BatchInfo +BernoulliCellSampler +BernoulliSampler +Binarizer +BinaryAttribute +BinaryClassificationEvaluator +BinaryClassificationMetrics +BinaryLogisticRegressionSummary +BinaryLogisticRegressionTrainingSummary +BinarySample +BinaryType +BisectingKMeans +BisectingKMeansModel +BlockId +BlockManagerId +BlockMatrix +BlockNotFoundException +BlockStatus +BlockUpdatedInfo +BooleanParam +BooleanType +BoostingStrategy +BoundedDouble +Broadcast +BroadcastBlockId +BroadcastFactory +Broker +Bucketizer +BufferReleasingInputStream +ByteType +CalendarIntervalType +CatalystScan +CategoricalSplit +ChiSqSelector +ChiSqSelector +ChiSqSelectorModel +ChiSqSelectorModel +ChiSqTestResult +ClassificationModel +ClassificationModel +Classifier +CleanAccum +CleanBroadcast +CleanCheckpoint +CleanRDD +CleanShuffle +CleanupTask +CleanupTaskWeakReference +CoGroupedRDD +CoGroupFunction +Column +ColumnName +ColumnPruner +ComplexFutureAction +CompressionCodec +ConnectedComponents +ConstantInputDStream +ContinuousSplit +CoordinateMatrix +CountVectorizer +CountVectorizerModel +CreatableRelationProvider +CrossValidator +CrossValidatorModel +DataFrame +DataFrameHolder +DataFrameNaFunctions +DataFrameReader +DataFrameStatFunctions +DataFrameWriter +Dataset +DatasetHolder +DataSourceRegister +DataType +DataTypes +DataValidators +DateType +DB2Dialect +DCT +Decimal +DecimalType +DecisionTree +DecisionTreeClassificationModel +DecisionTreeClassifier +DecisionTreeModel +DecisionTreeRegressionModel +DecisionTreeRegressor +DefaultSource +DenseMatrix +DenseVector +Dependency +DerbyDialect +DeserializationStream +DeveloperApi +DistributedLDAModel +DistributedLDAModel +DistributedMatrix +DoubleArrayParam +DoubleFlatMapFunction +DoubleFunction +DoubleParam +DoubleRDDFunctions +DoubleType +DStream +DummySerializerInstance +Duration +Durations +Edge +EdgeActiveness +EdgeContext +EdgeDirection +EdgeRDD +EdgeRDDImpl +EdgeTriplet +ElementwiseProduct +ElementwiseProduct +EMLDAOptimizer +Encoder +Encoders +Entropy +EnumUtil +EnvironmentListener +EqualNullSafe +EqualTo +Estimator +Evaluator +ExceptionFailure +ExecutionListenerManager +ExecutorInfo +ExecutorLostFailure +ExecutorRegistered +ExecutorRemoved +ExecutorsListener +ExecutorStageSummary +ExecutorSummary +ExpectationSum +Experimental +ExperimentalMethods +ExponentialGenerator +FeatureType +FetchFailed +Filter +FilterFunction +FlatMapFunction +FlatMapFunction2 +FlatMapGroupsFunction +FloatParam +FloatType +FlumeUtils +ForeachFunction +ForeachPartitionFunction +FPGrowth +FPGrowth.FreqItemset +FPGrowthModel +Function +Function0 +Function2 +Function3 +Function4 +functions +FutureAction +GammaGenerator +GaussianMixture +GaussianMixtureModel +GBTClassificationModel +GBTClassifier +GBTRegressionModel +GBTRegressor +GeneralizedLinearAlgorithm +GeneralizedLinearModel +Gini +Gradient +GradientBoostedTrees +GradientBoostedTreesModel +GradientDescent +Graph +GraphGenerators +GraphImpl +GraphKryoRegistrator +GraphLoader +GraphOps +GraphXUtils +GreaterThan +GreaterThanOrEqual +GroupedData +GroupedDataset +HadoopFsRelation +HadoopFsRelation.FakeFileStatus +HadoopFsRelation.FakeFileStatus$ +HadoopFsRelationProvider +HadoopRDD +HashingTF +HashingTF +HashPartitioner +HasOffsetRanges +HingeGradient +HiveContext +HttpBroadcastFactory +Identifiable +IDF +IDF +IDF.DocumentFrequencyAggregator +IDFModel +IDFModel +Impurity +In +IndexedRow +IndexedRowMatrix +IndexToString +InformationGainStats +InnerClosureFinder +InputDStream +InputFormatInfo +InputMetricDistributions +InputMetrics +InsertableRelation +IntArrayParam +IntegerType +Interaction +InternalNode +InterruptibleIterator +IntParam +IsNotNull +IsNull +IsotonicRegression +IsotonicRegression
[49/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/agg.html -- diff --git a/site/docs/1.6.3/api/R/agg.html b/site/docs/1.6.3/api/R/agg.html new file mode 100644 index 000..3994971 --- /dev/null +++ b/site/docs/1.6.3/api/R/agg.html @@ -0,0 +1,150 @@ + +R: Summarize data across columns + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +agg {SparkR}R Documentation + +Summarize data across columns + +Description + +Compute aggregates by specifying a list of columns + +Count the number of rows for each group. +The resulting DataFrame will also contain the grouping columns. + + + +Usage + + +## S4 method for signature 'DataFrame' +agg(x, ...) + +## S4 method for signature 'DataFrame' +summarize(x, ...) + +agg(x, ...) + +summarize(x, ...) + +## S4 method for signature 'GroupedData' +count(x) + + + +Arguments + + +x + +a DataFrame + +x + +a GroupedData + + + + +Value + +a DataFrame + + + +See Also + +Other DataFrame functions: $, +$-, select, +select, +select,DataFrame,Column-method, +select,DataFrame,list-method, +selectExpr; DataFrame-class, +dataFrame, groupedData; +[, [, [[, +subset; arrange, +arrange, arrange, +orderBy, orderBy; +as.data.frame, +as.data.frame,DataFrame-method; +attach, +attach,DataFrame-method; +cache; collect; +colnames, colnames, +colnames-, colnames-, +columns, names, +names-; coltypes, +coltypes, coltypes-, +coltypes-; columns, +dtypes, printSchema, +schema, schema; +count, nrow; +describe, describe, +describe, summary, +summary, +summary,PipelineModel-method; +dim; distinct, +unique; dropna, +dropna, fillna, +fillna, na.omit, +na.omit; dtypes; +except, except; +explain, explain; +filter, filter, +where, where; +first, first; +groupBy, groupBy, +group_by, group_by; +head; insertInto, +insertInto; intersect, +intersect; isLocal, +isLocal; join; +limit, limit; +merge, merge; +mutate, mutate, +transform, transform; +ncol; persist; +printSchema; rbind, +rbind, unionAll, +unionAll; registerTempTable, +registerTempTable; rename, +rename, withColumnRenamed, +withColumnRenamed; +repartition; sample, +sample, sample_frac, +sample_frac; +saveAsParquetFile, +saveAsParquetFile, +write.parquet, write.parquet; +saveAsTable, saveAsTable; +saveDF, saveDF, +write.df, write.df, +write.df; selectExpr; +showDF, showDF; +show, show, +show,GroupedData-method; str; +take; unpersist; +withColumn, withColumn; +write.json, write.json; +write.text, write.text + + + +Examples + +## Not run: +##D count(groupBy(df, name)) +## End(Not run) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/alias.html -- diff --git a/site/docs/1.6.3/api/R/alias.html b/site/docs/1.6.3/api/R/alias.html new file mode 100644 index 000..633f96c --- /dev/null +++ b/site/docs/1.6.3/api/R/alias.html @@ -0,0 +1,33 @@ + +R: alias + + + + +alias {SparkR}R Documentation + +alias + +Description + +Set a new name for a column + + + +Usage + + +## S4 method for signature 'Column' +alias(object, data) + + + +See Also + +Other colum_func: between; +cast; otherwise; +substr + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/approxCountDistinct.html -- diff --git a/site/docs/1.6.3/api/R/approxCountDistinct.html b/site/docs/1.6.3/api/R/approxCountDistinct.html new file mode 100644 index 000..92cdc44 --- /dev/null +++ b/site/docs/1.6.3/api/R/approxCountDistinct.html @@ -0,0 +1,97 @@ + +R: approxCountDistinct + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +approxCountDistinct {SparkR}R Documentation + +approxCountDistinct + +Description + +Aggregate function: returns the approximate number of distinct items in a group. + +Approx Count Distinct + + + +Usage + + +## S4 method for signature 'Column' +approxCountDistinct(x, rsd = 0.05) + +## S4 method for signature 'Column' +approxCountDistinct(x, rsd = 0.05) + +approxCountDistinct(x, ...) + + + +Value + +the approximate number of distinct items in a group. + + + +See Also + +Other agg_funcs: agg, +summarize; avg, +avg; countDistinct, +countDistinct, n_distinct, +n_distinct; count, +n, n; first, +first; kurtosis, +kurtosis; last, +last; max; +mean; min; sd, +sd, stddev, +stddev; skewness, +skewness;
[23/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/SimpleFutureAction.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/SimpleFutureAction.html b/site/docs/1.6.3/api/java/org/apache/spark/SimpleFutureAction.html new file mode 100644 index 000..32c985d --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/SimpleFutureAction.html @@ -0,0 +1,440 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +SimpleFutureAction (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class SimpleFutureActionT + + + +Object + + +org.apache.spark.SimpleFutureActionT + + + + + + + +All Implemented Interfaces: +FutureActionT, scala.concurrent.AwaitableT, scala.concurrent.FutureT + + + +public class SimpleFutureActionT +extends Object +implements FutureActionT +A FutureAction holding the result of an action that triggers a single job. Examples include + count, collect, reduce. + + + + + + + + + + + +Nested Class Summary + + + + +Nested classes/interfaces inherited from interfacescala.concurrent.Future +scala.concurrent.Future.InternalCallbackExecutor$ + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +void +cancel() +Cancels the execution of this action. + + + +boolean +isCancelled() +Returns whether the action has been cancelled. + + + +boolean +isCompleted() +Returns whether the action has already been completed with a value or an exception. + + + +scala.collection.SeqObject +jobIds() +Returns the job IDs run by the underlying async operation. + + + +Uvoid +onComplete(scala.Function1scala.util.TryT,Ufunc, + scala.concurrent.ExecutionContextexecutor) +When this action is completed, either through an exception, or a value, applies the provided + function. + + + +SimpleFutureActionT +ready(scala.concurrent.duration.DurationatMost, + scala.concurrent.CanAwaitpermit) +Blocks until this action completes. + + + +T +result(scala.concurrent.duration.DurationatMost, + scala.concurrent.CanAwaitpermit) +Awaits and returns the result (of type T) of this action. + + + +scala.Optionscala.util.TryT +value() +The value of this Future. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.FutureAction +get + + + + + +Methods inherited from interfacescala.concurrent.Future +andThen, collect, failed, fallbackTo, filter, flatMap, foreach, map, mapTo, onFailure, onSuccess, recover, recoverWith, transform, withFilter, zip + + + + + + + + + + + + + + +Method Detail + + + + + +cancel +publicvoidcancel() +Description copied from interface:FutureAction +Cancels the execution of this action. + +Specified by: +cancelin interfaceFutureActionT + + + + + + + + +ready +publicSimpleFutureActionTready(scala.concurrent.duration.DurationatMost, + scala.concurrent.CanAwaitpermit) +Description copied from interface:FutureAction +Blocks until this action completes. + +Specified by: +readyin interfaceFutureActionT +Specified by: +readyin interfacescala.concurrent.AwaitableT +Parameters:atMost - maximum wait time, which may be negative (no waiting is done), Duration.Inf + for unbounded waiting, or a finite positive durationpermit - (undocumented) +Returns:this FutureAction + + + + + + + +result +publicTresult(scala.concurrent.duration.DurationatMost, + scala.concurrent.CanAwaitpermit) +Description copied from interface:FutureAction +Awaits and returns the result (of type T) of this action. + +Specified by: +resultin interfaceFutureActionT +Specified by: +resultin interfacescala.concurrent.AwaitableT +Parameters:atMost - maximum wait time, which may be negative (no waiting is done), Duration.Inf + for unbounded waiting, or a finite positive durationpermit - (undocumented) +Returns:the result value if the action is completed within the specific maximum wait time + + + + + + + +onComplete +publicUvoidonComplete(scala.Function1scala.util.TryT,Ufunc, + scala.concurrent.ExecutionContextexecutor) +Description copied from interface:FutureAction +When this action is completed, either through
[05/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/broadcast/Broadcast.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/broadcast/Broadcast.html b/site/docs/1.6.3/api/java/org/apache/spark/broadcast/Broadcast.html new file mode 100644 index 000..7256968 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/broadcast/Broadcast.html @@ -0,0 +1,381 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Broadcast (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.broadcast +Class BroadcastT + + + +Object + + +org.apache.spark.broadcast.BroadcastT + + + + + + + +All Implemented Interfaces: +java.io.Serializable, Logging + + + +public abstract class BroadcastT +extends Object +implements java.io.Serializable, Logging +A broadcast variable. Broadcast variables allow the programmer to keep a read-only variable + cached on each machine rather than shipping a copy of it with tasks. They can be used, for + example, to give every node a copy of a large input dataset in an efficient manner. Spark also + attempts to distribute broadcast variables using efficient broadcast algorithms to reduce + communication cost. + + Broadcast variables are created from a variable v by calling + SparkContext.broadcast(T, scala.reflect.ClassTagT). + The broadcast variable is a wrapper around v, and its value can be accessed by calling the + value method. The interpreter session below shows this: + + + scala val broadcastVar = sc.broadcast(Array(1, 2, 3)) + broadcastVar: org.apache.spark.broadcast.Broadcast[Array[Int} = Broadcast(0) + + scala broadcastVar.value + res0: Array[Int] = Array(1, 2, 3) + + + After the broadcast variable is created, it should be used instead of the value v in any + functions run on the cluster so that v is not shipped to the nodes more than once. + In addition, the object v should not be modified after it is broadcast in order to ensure + that all nodes get the same value of the broadcast variable (e.g. if the variable is shipped + to a new node later). + + param: id A unique identifier for the broadcast variable. +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +Broadcast(longid, + scala.reflect.ClassTagTevidence$1) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +void +destroy() +Destroy all data and metadata related to this broadcast variable. + + + +long +id() + + +String +toString() + + +void +unpersist() +Asynchronously delete cached copies of this broadcast on the executors. + + + +void +unpersist(booleanblocking) +Delete cached copies of this broadcast on the executors. + + + +T +value() +Get the broadcasted value. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.Logging +initializeIfNecessary, initializeLogging, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning + + + + + + + + + + + + + + +Constructor Detail + + + + + +Broadcast +publicBroadcast(longid, + scala.reflect.ClassTagTevidence$1) + + + + + + + + + +Method Detail + + + + + +id +publiclongid() + + + + + + + +value +publicTvalue() +Get the broadcasted value. + + + + + + + +unpersist +publicvoidunpersist() +Asynchronously delete cached copies of this broadcast on the executors. + If the broadcast is used after this is called, it will need to be re-sent to each executor. + + + + + + + +unpersist +publicvoidunpersist(booleanblocking) +Delete cached copies of this broadcast on the executors. If the broadcast is used after + this is called, it will need to be re-sent to each executor. +Parameters:blocking - Whether to block until unpersisting has completed + + + + + + + +destroy +publicvoiddestroy() +Destroy all data and metadata related to this broadcast variable. Use this with caution; + once a broadcast variable has been destroyed, it cannot be used again. + This method blocks until destroy has completed + + + + + + + +toString +publicStringtoString() + +Overrides: +toStringin
[26/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/HashPartitioner.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/HashPartitioner.html b/site/docs/1.6.3/api/java/org/apache/spark/HashPartitioner.html new file mode 100644 index 000..ddde16c --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/HashPartitioner.html @@ -0,0 +1,332 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +HashPartitioner (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class HashPartitioner + + + +Object + + +org.apache.spark.Partitioner + + +org.apache.spark.HashPartitioner + + + + + + + + + +All Implemented Interfaces: +java.io.Serializable + + + +public class HashPartitioner +extends Partitioner +A Partitioner that implements hash-based partitioning using + Java's Object.hashCode. + + Java arrays have hashCodes that are based on the arrays' identities rather than their contents, + so attempting to partition an RDD[Array[_} or RDD[(Array[_], _)] using a HashPartitioner will + produce an unexpected or incorrect result. +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +HashPartitioner(intpartitions) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +boolean +equals(Objectother) + + +int +getPartition(Objectkey) + + +int +hashCode() + + +int +numPartitions() + + + + + + +Methods inherited from classorg.apache.spark.Partitioner +defaultPartitioner + + + + + +Methods inherited from classObject +getClass, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + +HashPartitioner +publicHashPartitioner(intpartitions) + + + + + + + + + +Method Detail + + + + + +numPartitions +publicintnumPartitions() + +Specified by: +numPartitionsin classPartitioner + + + + + + + + +getPartition +publicintgetPartition(Objectkey) + +Specified by: +getPartitionin classPartitioner + + + + + + + + +equals +publicbooleanequals(Objectother) + +Overrides: +equalsin classObject + + + + + + + + +hashCode +publicinthashCode() + +Overrides: +hashCodein classObject + + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/InterruptibleIterator.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/InterruptibleIterator.html b/site/docs/1.6.3/api/java/org/apache/spark/InterruptibleIterator.html new file mode 100644 index 000..c847949 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/InterruptibleIterator.html @@ -0,0 +1,354 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +InterruptibleIterator (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class InterruptibleIteratorT + + + +Object + + +org.apache.spark.InterruptibleIteratorT + + + + + + + +All Implemented Interfaces: +scala.collection.GenTraversableOnceT, scala.collection.IteratorT, scala.collection.TraversableOnceT + + + +public class
[35/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/allclasses-noframe.html -- diff --git a/site/docs/1.6.3/api/java/allclasses-noframe.html b/site/docs/1.6.3/api/java/allclasses-noframe.html new file mode 100644 index 000..6d204ed --- /dev/null +++ b/site/docs/1.6.3/api/java/allclasses-noframe.html @@ -0,0 +1,791 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +All Classes (Spark 1.6.3 JavaDoc) + + + + +All Classes + + +AbsoluteError +Accumulable +AccumulableInfo +AccumulableInfo +AccumulableParam +Accumulator +AccumulatorParam +AccumulatorParam.DoubleAccumulatorParam$ +AccumulatorParam.FloatAccumulatorParam$ +AccumulatorParam.IntAccumulatorParam$ +AccumulatorParam.LongAccumulatorParam$ +ActorHelper +ActorSupervisorStrategy +AFTAggregator +AFTCostFun +AFTSurvivalRegression +AFTSurvivalRegressionModel +AggregatedDialect +AggregatingEdgeContext +Aggregator +Aggregator +Algo +AlphaComponent +ALS +ALS +ALS.Rating +ALS.Rating$ +ALSModel +AnalysisException +And +ApplicationAttemptInfo +ApplicationInfo +ApplicationStatus +ArrayType +AskPermissionToCommitOutput +AssociationRules +AssociationRules.Rule +AsyncRDDActions +Attribute +AttributeGroup +AttributeType +BaseRelation +BaseRRDD +BatchInfo +BernoulliCellSampler +BernoulliSampler +Binarizer +BinaryAttribute +BinaryClassificationEvaluator +BinaryClassificationMetrics +BinaryLogisticRegressionSummary +BinaryLogisticRegressionTrainingSummary +BinarySample +BinaryType +BisectingKMeans +BisectingKMeansModel +BlockId +BlockManagerId +BlockMatrix +BlockNotFoundException +BlockStatus +BlockUpdatedInfo +BooleanParam +BooleanType +BoostingStrategy +BoundedDouble +Broadcast +BroadcastBlockId +BroadcastFactory +Broker +Bucketizer +BufferReleasingInputStream +ByteType +CalendarIntervalType +CatalystScan +CategoricalSplit +ChiSqSelector +ChiSqSelector +ChiSqSelectorModel +ChiSqSelectorModel +ChiSqTestResult +ClassificationModel +ClassificationModel +Classifier +CleanAccum +CleanBroadcast +CleanCheckpoint +CleanRDD +CleanShuffle +CleanupTask +CleanupTaskWeakReference +CoGroupedRDD +CoGroupFunction +Column +ColumnName +ColumnPruner +ComplexFutureAction +CompressionCodec +ConnectedComponents +ConstantInputDStream +ContinuousSplit +CoordinateMatrix +CountVectorizer +CountVectorizerModel +CreatableRelationProvider +CrossValidator +CrossValidatorModel +DataFrame +DataFrameHolder +DataFrameNaFunctions +DataFrameReader +DataFrameStatFunctions +DataFrameWriter +Dataset +DatasetHolder +DataSourceRegister +DataType +DataTypes +DataValidators +DateType +DB2Dialect +DCT +Decimal +DecimalType +DecisionTree +DecisionTreeClassificationModel +DecisionTreeClassifier +DecisionTreeModel +DecisionTreeRegressionModel +DecisionTreeRegressor +DefaultSource +DenseMatrix +DenseVector +Dependency +DerbyDialect +DeserializationStream +DeveloperApi +DistributedLDAModel +DistributedLDAModel +DistributedMatrix +DoubleArrayParam +DoubleFlatMapFunction +DoubleFunction +DoubleParam +DoubleRDDFunctions +DoubleType +DStream +DummySerializerInstance +Duration +Durations +Edge +EdgeActiveness +EdgeContext +EdgeDirection +EdgeRDD +EdgeRDDImpl +EdgeTriplet +ElementwiseProduct +ElementwiseProduct +EMLDAOptimizer +Encoder +Encoders +Entropy +EnumUtil +EnvironmentListener +EqualNullSafe +EqualTo +Estimator +Evaluator +ExceptionFailure +ExecutionListenerManager +ExecutorInfo +ExecutorLostFailure +ExecutorRegistered +ExecutorRemoved +ExecutorsListener +ExecutorStageSummary +ExecutorSummary +ExpectationSum +Experimental +ExperimentalMethods +ExponentialGenerator +FeatureType +FetchFailed +Filter +FilterFunction +FlatMapFunction +FlatMapFunction2 +FlatMapGroupsFunction +FloatParam +FloatType +FlumeUtils +ForeachFunction +ForeachPartitionFunction +FPGrowth +FPGrowth.FreqItemset +FPGrowthModel +Function +Function0 +Function2 +Function3 +Function4 +functions +FutureAction +GammaGenerator +GaussianMixture +GaussianMixtureModel +GBTClassificationModel +GBTClassifier +GBTRegressionModel +GBTRegressor +GeneralizedLinearAlgorithm +GeneralizedLinearModel +Gini +Gradient +GradientBoostedTrees +GradientBoostedTreesModel +GradientDescent +Graph +GraphGenerators +GraphImpl +GraphKryoRegistrator +GraphLoader +GraphOps +GraphXUtils +GreaterThan +GreaterThanOrEqual +GroupedData +GroupedDataset +HadoopFsRelation +HadoopFsRelation.FakeFileStatus +HadoopFsRelation.FakeFileStatus$ +HadoopFsRelationProvider +HadoopRDD +HashingTF +HashingTF +HashPartitioner +HasOffsetRanges +HingeGradient +HiveContext +HttpBroadcastFactory +Identifiable +IDF +IDF +IDF.DocumentFrequencyAggregator +IDFModel +IDFModel +Impurity +In +IndexedRow +IndexedRowMatrix +IndexToString +InformationGainStats +InnerClosureFinder +InputDStream +InputFormatInfo +InputMetricDistributions +InputMetrics +InsertableRelation +IntArrayParam +IntegerType +Interaction +InternalNode +InterruptibleIterator +IntParam +IsNotNull +IsNull +IsotonicRegression
[22/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.DoubleAccumulatorParam$.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.DoubleAccumulatorParam$.html b/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.DoubleAccumulatorParam$.html new file mode 100644 index 000..47a9681 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.DoubleAccumulatorParam$.html @@ -0,0 +1,347 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +SparkContext.DoubleAccumulatorParam$ (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class SparkContext.DoubleAccumulatorParam$ + + + +Object + + +org.apache.spark.SparkContext.DoubleAccumulatorParam$ + + + + + + + +All Implemented Interfaces: +java.io.Serializable, AccumulableParamObject,Object, AccumulatorParamObject + + +Enclosing class: +SparkContext + + + +public static class SparkContext.DoubleAccumulatorParam$ +extends Object +implements AccumulatorParamObject +See Also:Serialized Form + + + + + + + + + + + +Nested Class Summary + + + + +Nested classes/interfaces inherited from interfaceorg.apache.spark.AccumulatorParam +AccumulatorParam.DoubleAccumulatorParam$, AccumulatorParam.FloatAccumulatorParam$, AccumulatorParam.IntAccumulatorParam$, AccumulatorParam.LongAccumulatorParam$ + + + + + + + + +Field Summary + +Fields + +Modifier and Type +Field and Description + + +static AccumulatorParam.DoubleAccumulatorParam$ +MODULE$ +Static reference to the singleton instance of this Scala object. + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +SparkContext.DoubleAccumulatorParam$() + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +double +addInPlace(doublet1, + doublet2) + + +double +zero(doubleinitialValue) + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.AccumulatorParam +addAccumulator + + + + + +Methods inherited from interfaceorg.apache.spark.AccumulableParam +addInPlace, zero + + + + + + + + + + + + + + +Field Detail + + + + + +MODULE$ +public static finalAccumulatorParam.DoubleAccumulatorParam$ MODULE$ +Static reference to the singleton instance of this Scala object. + + + + + + + + + +Constructor Detail + + + + + +SparkContext.DoubleAccumulatorParam$ +publicSparkContext.DoubleAccumulatorParam$() + + + + + + + + + +Method Detail + + + + + +addInPlace +publicdoubleaddInPlace(doublet1, +doublet2) + + + + + + + +zero +publicdoublezero(doubleinitialValue) + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.FloatAccumulatorParam$.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.FloatAccumulatorParam$.html b/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.FloatAccumulatorParam$.html new file mode 100644 index 000..1b65739 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/SparkContext.FloatAccumulatorParam$.html @@ -0,0 +1,347 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +SparkContext.FloatAccumulatorParam$ (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + +
[02/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/graphx/GraphLoader.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/graphx/GraphLoader.html b/site/docs/1.6.3/api/java/org/apache/spark/graphx/GraphLoader.html new file mode 100644 index 000..eb8e9e9 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/graphx/GraphLoader.html @@ -0,0 +1,291 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +GraphLoader (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.graphx +Class GraphLoader + + + +Object + + +org.apache.spark.graphx.GraphLoader + + + + + + + +All Implemented Interfaces: +Logging + + + +public class GraphLoader +extends Object +implements Logging +Provides utilities for loading Graphs from files. + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +GraphLoader() + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +static GraphObject,Object +edgeListFile(SparkContextsc, +Stringpath, +booleancanonicalOrientation, +intnumEdgePartitions, +StorageLeveledgeStorageLevel, +StorageLevelvertexStorageLevel) +Loads a graph from an edge list formatted file where each line contains two integers: a source + id and a target id. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.Logging +initializeIfNecessary, initializeLogging, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning + + + + + + + + + + + + + + +Constructor Detail + + + + + +GraphLoader +publicGraphLoader() + + + + + + + + + +Method Detail + + + + + +edgeListFile +public staticGraphObject,ObjectedgeListFile(SparkContextsc, +Stringpath, +booleancanonicalOrientation, +intnumEdgePartitions, +StorageLeveledgeStorageLevel, +StorageLevelvertexStorageLevel) +Loads a graph from an edge list formatted file where each line contains two integers: a source + id and a target id. Skips lines that begin with #. + + If desired the edges can be automatically oriented in the positive + direction (source Id < target Id) by setting canonicalOrientation to + true. + +Parameters:sc - SparkContextpath - the path to the file (e.g., /home/data/file or hdfs://file)canonicalOrientation - whether to orient edges in the positive +directionnumEdgePartitions - the number of partitions for the edge RDD + Setting this value to -1 will use the default parallelism.edgeStorageLevel - the desired storage level for the edge partitionsvertexStorageLevel - the desired storage level for the vertex partitions +Returns:(undocumented) + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/graphx/GraphOps.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/graphx/GraphOps.html b/site/docs/1.6.3/api/java/org/apache/spark/graphx/GraphOps.html new file mode 100644 index 000..fee8a5a --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/graphx/GraphOps.html @@ -0,0 +1,735 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +GraphOps (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on
[07/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/package-summary.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/package-summary.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/package-summary.html new file mode 100644 index 000..a041420 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/package-summary.html @@ -0,0 +1,196 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +org.apache.spark.api.java (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Package +Next Package + + +Frames +No Frames + + +All Classes + + + + + + + + + + +Packageorg.apache.spark.api.java + +Spark Java programming APIs. + +See:Description + + + + + +Interface Summary + +Interface +Description + + + +JavaFutureActionT + + + +JavaRDDLikeT,This extends JavaRDDLikeT,This + +Defines operations common to several Java RDD implementations. + + + + + + + +Class Summary + +Class +Description + + + +JavaDoubleRDD + + + +JavaHadoopRDDK,V + + + +JavaNewHadoopRDDK,V + + + +JavaPairRDDK,V + + + +JavaRDDT + + + +JavaSparkContext + +A Java-friendly version of SparkContext that returns + JavaRDDs and works with Java collections instead of Scala ones. + + + +JavaSparkStatusTracker + +Low-level status reporting APIs for monitoring job and stage progress. + + + +StorageLevels + +Expose some commonly useful storage level constants. + + + + + + + + + +Package org.apache.spark.api.java Description +Spark Java programming APIs. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Package +Next Package + + +Frames +No Frames + + +All Classes + + + + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/package-tree.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/package-tree.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/package-tree.html new file mode 100644 index 000..e3ed5d6 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/package-tree.html @@ -0,0 +1,153 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +org.apache.spark.api.java Class Hierarchy (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +No Frames + + +All Classes + + + + + + + + + + +Hierarchy For Package org.apache.spark.api.java +Package Hierarchies: + +All Packages + + + +Class Hierarchy + +Object + +org.apache.spark.api.java.JavaDoubleRDD +org.apache.spark.api.java.JavaPairRDDK,V + +org.apache.spark.api.java.JavaHadoopRDDK,V +org.apache.spark.api.java.JavaNewHadoopRDDK,V + + +org.apache.spark.api.java.JavaRDDT +org.apache.spark.api.java.JavaSparkContext (implements java.io.Closeable) +org.apache.spark.api.java.JavaSparkStatusTracker +org.apache.spark.api.java.StorageLevels + + + +Interface Hierarchy + +java.util.concurrent.FutureV + +org.apache.spark.api.java.JavaFutureActionT + + +java.io.Serializable + +scala.Serializable + +org.apache.spark.api.java.JavaRDDLikeT,This + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev +Next + + +Frames +No Frames + + +All Classes + + + + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/r/BaseRRDD.html -- diff
[32/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/index.html -- diff --git a/site/docs/1.6.3/api/java/index.html b/site/docs/1.6.3/api/java/index.html new file mode 100644 index 000..f19f028 --- /dev/null +++ b/site/docs/1.6.3/api/java/index.html @@ -0,0 +1,74 @@ +http://www.w3.org/TR/html4/frameset.dtd;> + + + + +Spark 1.6.3 JavaDoc + +targetPage = "" + window.location.search; +if (targetPage != "" && targetPage != "undefined") +targetPage = targetPage.substring(1); +if (targetPage.indexOf(":") != -1 || (targetPage != "" && !validURL(targetPage))) +targetPage = "undefined"; +function validURL(url) { +try { +url = decodeURIComponent(url); +} +catch (error) { +return false; +} +var pos = url.indexOf(".html"); +if (pos == -1 || pos != url.length - 5) +return false; +var allowNumber = false; +var allowSep = false; +var seenDot = false; +for (var i = 0; i < url.length - 5; i++) { +var ch = url.charAt(i); +if ('a' <= ch && ch <= 'z' || +'A' <= ch && ch <= 'Z' || +ch == '$' || +ch == '_' || +ch.charCodeAt(0) > 127) { +allowNumber = true; +allowSep = true; +} else if ('0' <= ch && ch <= '9' +|| ch == '-') { +if (!allowNumber) + return false; +} else if (ch == '/' || ch == '.') { +if (!allowSep) +return false; +allowNumber = false; +allowSep = false; +if (ch == '.') + seenDot = true; +if (ch == '/' && seenDot) + return false; +} else { +return false; +} +} +return true; +} +function loadFrames() { +if (targetPage != "" && targetPage != "undefined") + top.classFrame.location = top.targetPage; +} + + + + + + + + + + +JavaScript is disabled on your browser. + +Frame Alert +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to Non-frame version. + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/lib/api-javadocs.js -- diff --git a/site/docs/1.6.3/api/java/lib/api-javadocs.js b/site/docs/1.6.3/api/java/lib/api-javadocs.js new file mode 100644 index 000..ead13d6 --- /dev/null +++ b/site/docs/1.6.3/api/java/lib/api-javadocs.js @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Dynamically injected post-processing code for the API docs */ + +$(document).ready(function() { + addBadges(":: AlphaComponent ::", 'Alpha Component'); + addBadges(":: DeveloperApi ::", 'Developer API'); + addBadges(":: Experimental ::", 'Experimental'); +}); + +function addBadges(tag, html) { + var tags = $(".block:contains(" + tag + ")") + + // Remove identifier tags + tags.each(function(index) { +var oldHTML = $(this).html(); +var newHTML = oldHTML.replace(tag, ""); +$(this).html(newHTML); + }); + + // Add html badge tags + tags.each(function(index) { +if ($(this).parent().is('td.colLast')) { + $(this).parent().prepend(html); +} else if ($(this).parent('li.blockList') + .parent('ul.blockList') + .parent('div.description') + .parent().is('div.contentContainer')) { + var contentContainer = $(this).parent('li.blockList') +.parent('ul.blockList') +.parent('div.description') +.parent('div.contentContainer') + var header = contentContainer.prev('div.header'); + if (header.length > 0) { +header.prepend(html); + } else { +
[27/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/ExceptionFailure.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/ExceptionFailure.html b/site/docs/1.6.3/api/java/org/apache/spark/ExceptionFailure.html new file mode 100644 index 000..9ac2241 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/ExceptionFailure.html @@ -0,0 +1,391 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +ExceptionFailure (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class ExceptionFailure + + + +Object + + +org.apache.spark.ExceptionFailure + + + + + + + +All Implemented Interfaces: +java.io.Serializable, TaskEndReason, TaskFailedReason, scala.Equals, scala.Product + + + +public class ExceptionFailure +extends Object +implements TaskFailedReason, scala.Product, scala.Serializable +:: DeveloperApi :: + Task failed due to a runtime exception. This is the most common failure case and also captures + user program exceptions. + + stackTrace contains the stack trace of the exception itself. It still exists for backward + compatibility. It's better to use this(e: Throwable, metrics: Option[TaskMetrics]) to + create ExceptionFailure as it will handle the backward compatibility properly. + + fullStackTrace is a better representation of the stack trace because it contains the whole + stack trace including the exception and its causes + + exception is the actual exception that caused the task to fail. It may be None in + the case that the exception is not in fact serializable. If a task fails more than + once (due to retries), exception is that one that caused the last failure. +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +ExceptionFailure(StringclassName, +Stringdescription, +StackTraceElement[]stackTrace, +StringfullStackTrace, + scala.Optionorg.apache.spark.executor.TaskMetricsmetrics, + scala.Optionorg.apache.spark.ThrowableSerializationWrapperexceptionWrapper) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +String +className() + + +String +description() + + +scala.OptionThrowable +exception() + + +String +fullStackTrace() + + +scala.Optionorg.apache.spark.executor.TaskMetrics +metrics() + + +StackTraceElement[] +stackTrace() + + +String +toErrorString() +Error message displayed in the web UI. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.TaskFailedReason +countTowardsTaskFailures + + + + + +Methods inherited from interfacescala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interfacescala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +ExceptionFailure +publicExceptionFailure(StringclassName, +Stringdescription, +StackTraceElement[]stackTrace, +StringfullStackTrace, + scala.Optionorg.apache.spark.executor.TaskMetricsmetrics, + scala.Optionorg.apache.spark.ThrowableSerializationWrapperexceptionWrapper) + + + + + + + + + +Method Detail + + + + + +className +publicStringclassName() + + + + + + + +description +publicStringdescription() + + + + + + + +stackTrace +publicStackTraceElement[]stackTrace() + + + + + + + +fullStackTrace +publicStringfullStackTrace() + + + + + + + +metrics +publicscala.Optionorg.apache.spark.executor.TaskMetricsmetrics() + + + + + + + +exception +publicscala.OptionThrowableexception() + + + + + + + +toErrorString +publicStringtoErrorString() +Description copied from interface:TaskFailedReason +Error message displayed in the web UI. + +Specified by: +toErrorStringin interfaceTaskFailedReason + + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java +Class JavaDoubleRDD + + + +Object + + +org.apache.spark.api.java.JavaDoubleRDD + + + + + + + +All Implemented Interfaces: +java.io.Serializable, JavaRDDLikeDouble,JavaDoubleRDD + + + +public class JavaDoubleRDD +extends Object +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +JavaDoubleRDD(RDDObjectsrdd) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +JavaDoubleRDD +cache() +Persist this RDD with the default storage level (`MEMORY_ONLY`). + + + +scala.reflect.ClassTagDouble +classTag() + + +JavaDoubleRDD +coalesce(intnumPartitions) +Return a new RDD that is reduced into numPartitions partitions. + + + +JavaDoubleRDD +coalesce(intnumPartitions, +booleanshuffle) +Return a new RDD that is reduced into numPartitions partitions. + + + +JavaDoubleRDD +distinct() +Return a new RDD containing the distinct elements in this RDD. + + + +JavaDoubleRDD +distinct(intnumPartitions) +Return a new RDD containing the distinct elements in this RDD. + + + +JavaDoubleRDD +filter(FunctionDouble,Booleanf) +Return a new RDD containing only the elements that satisfy a predicate. + + + +Double +first() +Return the first element in this RDD. + + + +static JavaDoubleRDD +fromRDD(RDDObjectrdd) + + +long[] +histogram(double[]buckets) +Compute a histogram using the provided buckets. + + + +long[] +histogram(Double[]buckets, + booleanevenBuckets) + + +scala.Tuple2double[],long[] +histogram(intbucketCount) +Compute a histogram of the data using bucketCount number of buckets evenly + spaced between the minimum and maximum of the RDD. + + + +JavaDoubleRDD +intersection(JavaDoubleRDDother) +Return the intersection of this RDD and another one. + + + +Double +max() +Returns the maximum element from this RDD as defined by + the default comparator natural order. + + + +Double +mean() +Compute the mean of this RDD's elements. + + + +PartialResultBoundedDouble +meanApprox(longtimeout) +Approximate operation to return the mean within a timeout. + + + +PartialResultBoundedDouble +meanApprox(longtimeout, + Doubleconfidence) +Return the approximate mean of the elements in this RDD. + + + +Double +min() +Returns the minimum element from this RDD as defined by + the default comparator natural order. + + + +JavaDoubleRDD +persist(StorageLevelnewLevel) +Set this RDD's storage level to persist its values across operations after the first time + it is computed. + + + +RDDDouble +rdd() + + +JavaDoubleRDD +repartition(intnumPartitions) +Return a new RDD that has exactly numPartitions partitions. + + + +JavaDoubleRDD +sample(booleanwithReplacement, + Doublefraction) +Return a sampled subset of this RDD. + + + +JavaDoubleRDD +sample(booleanwithReplacement, + Doublefraction, + longseed) +Return a sampled subset of this RDD. + + + +Double +sampleStdev() +Compute the sample standard deviation of this RDD's elements (which corrects for bias in + estimating the standard deviation by dividing by N-1 instead of N). + + + +Double +sampleVariance() +Compute the sample variance of this RDD's elements (which corrects for bias in + estimating the standard variance by dividing by N-1 instead of N). + + + +JavaDoubleRDD +setName(Stringname) +Assign a name to this RDD + + + +RDDObject +srdd() + + +StatCounter +stats() +Return a StatCounter object that captures the mean, variance and + count of the RDD's elements in one operation. + + + +Double +stdev() +Compute the standard deviation of this RDD's elements. + + + +JavaDoubleRDD +subtract(JavaDoubleRDDother) +Return an RDD with the elements from this that are not in other. +
[17/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/TaskResultLost.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/TaskResultLost.html b/site/docs/1.6.3/api/java/org/apache/spark/TaskResultLost.html new file mode 100644 index 000..ae025f8 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/TaskResultLost.html @@ -0,0 +1,284 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +TaskResultLost (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class TaskResultLost + + + +Object + + +org.apache.spark.TaskResultLost + + + + + + + +All Implemented Interfaces: +java.io.Serializable, TaskEndReason, TaskFailedReason, scala.Equals, scala.Product + + + +public class TaskResultLost +extends Object +implements TaskFailedReason, scala.Product, scala.Serializable +:: DeveloperApi :: + The task finished successfully, but the result was lost from the executor's block manager before + it was fetched. +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +TaskResultLost() + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +static String +toErrorString() + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.TaskFailedReason +countTowardsTaskFailures, toErrorString + + + + + +Methods inherited from interfacescala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interfacescala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +TaskResultLost +publicTaskResultLost() + + + + + + + + + +Method Detail + + + + + +toErrorString +public staticStringtoErrorString() + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/UnknownReason.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/UnknownReason.html b/site/docs/1.6.3/api/java/org/apache/spark/UnknownReason.html new file mode 100644 index 000..22e0b7e --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/UnknownReason.html @@ -0,0 +1,284 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +UnknownReason (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class UnknownReason + + + +Object + + +org.apache.spark.UnknownReason + + + + + + + +All Implemented Interfaces: +java.io.Serializable, TaskEndReason, TaskFailedReason, scala.Equals, scala.Product + + + +public class UnknownReason +extends Object +implements TaskFailedReason, scala.Product, scala.Serializable +:: DeveloperApi :: + We don't know why the task ended -- for example, because of a ClassNotFound exception when + deserializing the task result. +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +UnknownReason() + + + + + + + + + +Method Summary + +Methods
[03/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/graphx/Graph.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/graphx/Graph.html b/site/docs/1.6.3/api/java/org/apache/spark/graphx/Graph.html new file mode 100644 index 000..7213e80 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/graphx/Graph.html @@ -0,0 +1,983 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Graph (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.graphx +Class GraphVD,ED + + + +Object + + +org.apache.spark.graphx.GraphVD,ED + + + + + + + +All Implemented Interfaces: +java.io.Serializable + + +Direct Known Subclasses: +GraphImpl + + + +public abstract class GraphVD,ED +extends Object +implements scala.Serializable +The Graph abstractly represents a graph with arbitrary objects + associated with vertices and edges. The graph provides basic + operations to access and manipulate the data associated with + vertices and edges as well as the underlying structure. Like Spark + RDDs, the graph is a functional data-structure in which mutating + operations return new graphs. + +See Also:Serialized Form + + + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +AVertexRDDA +aggregateMessages(scala.Function1EdgeContextVD,ED,A,scala.runtime.BoxedUnitsendMsg, + scala.Function2A,A,AmergeMsg, + TripletFieldstripletFields, + scala.reflect.ClassTagAevidence$12) +Aggregates values from the neighboring edges and vertices of each vertex. + + + +static VD,EDGraphVD,ED +apply(RDDscala.Tuple2Object,VDvertices, + RDDEdgeEDedges, + VDdefaultVertexAttr, + StorageLeveledgeStorageLevel, + StorageLevelvertexStorageLevel, + scala.reflect.ClassTagVDevidence$19, + scala.reflect.ClassTagEDevidence$20) +Construct a graph from a collection of vertices and + edges with attributes. + + + +abstract GraphVD,ED +cache() +Caches the vertices and edges associated with this graph at the previously-specified target + storage levels, which default to MEMORY_ONLY. + + + +abstract void +checkpoint() +Mark this Graph for checkpointing. + + + +abstract EdgeRDDED +edges() +An RDD containing the edges and their associated attributes. + + + +static VD,EDGraphVD,ED +fromEdges(RDDEdgeEDedges, + VDdefaultValue, + StorageLeveledgeStorageLevel, + StorageLevelvertexStorageLevel, + scala.reflect.ClassTagVDevidence$17, + scala.reflect.ClassTagEDevidence$18) +Construct a graph from a collection of edges. + + + +static VDGraphVD,Object +fromEdgeTuples(RDDscala.Tuple2Object,ObjectrawEdges, + VDdefaultValue, + scala.OptionPartitionStrategyuniqueEdges, + StorageLeveledgeStorageLevel, + StorageLevelvertexStorageLevel, + scala.reflect.ClassTagVDevidence$16) +Construct a graph from a collection of edges encoded as vertex id pairs. + + + +abstract scala.collection.SeqString +getCheckpointFiles() +Gets the name of the files to which this Graph was checkpointed. + + + +static VD,EDGraphOpsVD,ED +graphToGraphOps(GraphVD,EDg, + scala.reflect.ClassTagVDevidence$21, + scala.reflect.ClassTagEDevidence$22) +Implicitly extracts the GraphOps member from a graph. + + + +abstract GraphVD,ED +groupEdges(scala.Function2ED,ED,EDmerge) +Merges multiple edges between two vertices into a single edge. + + + +abstract boolean +isCheckpointed() +Return whether this Graph has been checkpointed or not. + + + +ED2GraphVD,ED2 +mapEdges(scala.Function1EdgeED,ED2map, +scala.reflect.ClassTagED2evidence$4) +Transforms each edge attribute in the graph using the map function. + + + +abstract ED2GraphVD,ED2 +mapEdges(scala.Function2Object,scala.collection.IteratorEdgeED,scala.collection.IteratorED2map, +scala.reflect.ClassTagED2evidence$5) +Transforms each edge attribute using the map function, passing it a whole partition at a + time. + + + +abstract AVertexRDDA +mapReduceTriplets(scala.Function1EdgeTripletVD,ED,scala.collection.Iteratorscala.Tuple2Object,AmapFunc, + scala.Function2A,A,AreduceFunc, +
[45/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/hashCode.html -- diff --git a/site/docs/1.6.3/api/R/hashCode.html b/site/docs/1.6.3/api/R/hashCode.html new file mode 100644 index 000..ba178a2 --- /dev/null +++ b/site/docs/1.6.3/api/R/hashCode.html @@ -0,0 +1,74 @@ + +R: Compute the hashCode of an object + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +hashCode {SparkR}R Documentation + +Compute the hashCode of an object + +Description + +Java-style function to compute the hashCode for the given object. Returns +an integer value. + + + +Usage + + +hashCode(key) + + + +Arguments + + +key + +the object to be hashed + + + + +Details + +This only works for integer, numeric and character types right now. + + + +Value + +the hash code as an integer + + + +Examples + +hashCode(1L) # 1 + + +## Error in eval(expr, envir, enclos): could not find function hashCode + + +hashCode(1.0) # 1072693248 + + +## Error in eval(expr, envir, enclos): could not find function hashCode + + +hashCode(1) # 49 + + +## Error in eval(expr, envir, enclos): could not find function hashCode + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/head.html -- diff --git a/site/docs/1.6.3/api/R/head.html b/site/docs/1.6.3/api/R/head.html new file mode 100644 index 000..2347d4e --- /dev/null +++ b/site/docs/1.6.3/api/R/head.html @@ -0,0 +1,146 @@ + +R: Head + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +head {SparkR}R Documentation + +Head + +Description + +Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL, +then head() returns the first 6 rows in keeping with the current data.frame +convention in R. + + + +Usage + + +## S4 method for signature 'DataFrame' +head(x, num = 6L) + + + +Arguments + + +x + +A SparkSQL DataFrame + +num + +The number of rows to return. Default is 6. + + + + +Value + +A data.frame + + + +See Also + +Other DataFrame functions: $, +$-, select, +select, +select,DataFrame,Column-method, +select,DataFrame,list-method, +selectExpr; DataFrame-class, +dataFrame, groupedData; +[, [, [[, +subset; agg, +agg, +count,GroupedData-method, +summarize, summarize; +arrange, arrange, +arrange, orderBy, +orderBy; as.data.frame, +as.data.frame,DataFrame-method; +attach, +attach,DataFrame-method; +cache; collect; +colnames, colnames, +colnames-, colnames-, +columns, names, +names-; coltypes, +coltypes, coltypes-, +coltypes-; columns, +dtypes, printSchema, +schema, schema; +count, nrow; +describe, describe, +describe, summary, +summary, +summary,PipelineModel-method; +dim; distinct, +unique; dropna, +dropna, fillna, +fillna, na.omit, +na.omit; dtypes; +except, except; +explain, explain; +filter, filter, +where, where; +first, first; +groupBy, groupBy, +group_by, group_by; +insertInto, insertInto; +intersect, intersect; +isLocal, isLocal; +join; limit, +limit; merge, +merge; mutate, +mutate, transform, +transform; ncol; +persist; printSchema; +rbind, rbind, +unionAll, unionAll; +registerTempTable, +registerTempTable; rename, +rename, withColumnRenamed, +withColumnRenamed; +repartition; sample, +sample, sample_frac, +sample_frac; +saveAsParquetFile, +saveAsParquetFile, +write.parquet, write.parquet; +saveAsTable, saveAsTable; +saveDF, saveDF, +write.df, write.df, +write.df; selectExpr; +showDF, showDF; +show, show, +show,GroupedData-method; str; +take; unpersist; +withColumn, withColumn; +write.json, write.json; +write.text, write.text + + + +Examples + +## Not run: +##D sc - sparkR.init() +##D sqlContext - sparkRSQL.init(sc) +##D path - path/to/file.json +##D df - read.json(sqlContext, path) +##D head(df) +## End(Not run) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/hex.html -- diff --git a/site/docs/1.6.3/api/R/hex.html b/site/docs/1.6.3/api/R/hex.html new file mode 100644 index 000..cf775d2 --- /dev/null +++ b/site/docs/1.6.3/api/R/hex.html @@ -0,0 +1,70 @@ + +R: hex + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js">
[06/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/r/RRDD.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/r/RRDD.html b/site/docs/1.6.3/api/java/org/apache/spark/api/r/RRDD.html new file mode 100644 index 000..7b25f0e --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/r/RRDD.html @@ -0,0 +1,365 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +RRDD (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.r +Class RRDDT + + + +Object + + +org.apache.spark.rdd.RDDU + + +org.apache.spark.api.r.BaseRRDDT,byte[] + + +org.apache.spark.api.r.RRDDT + + + + + + + + + + + +All Implemented Interfaces: +java.io.Serializable, Logging + + + +public class RRDDT +extends BaseRRDDT,byte[] +An RDD that stores serialized R objects as Array[Byte]. +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +RRDD(RDDTparent, +byte[]func, +Stringdeserializer, +Stringserializer, +byte[]packageNames, +Object[]broadcastVars, +scala.reflect.ClassTagTevidence$4) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +JavaRDDbyte[] +asJavaRDD() + + +static JavaRDDbyte[] +createRDDFromArray(JavaSparkContextjsc, + byte[][]arr) +Create an RRDD given a sequence of byte arrays. + + + +static org.apache.spark.api.r.BufferedStreamThread +createRWorker(intport) +ProcessBuilder used to launch worker R processes. + + + +static JavaSparkContext +createSparkContext(Stringmaster, + StringappName, + StringsparkHome, + String[]jars, + java.util.MapObject,ObjectsparkEnvirMap, + java.util.MapObject,ObjectsparkExecutorEnvMap) + + + + + + +Methods inherited from classorg.apache.spark.api.r.BaseRRDD +compute, getPartitions + + + + + +Methods inherited from classorg.apache.spark.rdd.RDD +aggregate, cache, cartesian, checkpoint, checkpointData, coalesce, collect, collect, context, c ount, countApprox, countApproxDistinct, countApproxDistinct, countByValue, countByValueApprox, creationSite, dependencies, distinct, distinct, doubleRDDToDoubleRDDFunctions, filter, filterWith, first, flatMap, flatMapWith, fold, foreach, foreachPartition, foreachWith, getCheckpointFile, getNumPartitions, getStorageLevel, glom, groupBy, groupBy, groupBy, id, intersection, intersection, intersection, isCheckpointed, isEmpty, iterator, keyBy, localCheckpoint, map, mapPartitions, mapPartitionsWithContext, mapPartitionsWithIndex, mapPartitionsWithSplit, mapWith, max, min, name, numericRDDToDoubleRDDFunctions, partitioner, partitions, persist, persist, pipe, pipe, pipe, preferredLocations, randomSplit, rddToAsyncRDDActions, rddToOrderedRDDFunctions, rddToPairRDDFunctions, rddToSequenceFileRDDFunctions, reduce, repartition, sample, saveAsObjectFile, saveAsTextFile, saveAsTextFile, scope, setName, sortBy, sparkContext, subtract, subtract, subtract, take, takeOrdered, takeSample, toArray, toDebugString, toJavaRDD, toLocalIterator, top, toString, treeAggregate, treeReduce, union, unpersist, zip, zipPartitions, zipPartitions, zipPartitions, zipPartitions, zipPartitions, zipPartitions, zipWithIndex, zipWithUniqueId + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait + + + + + +Methods inherited from interfaceorg.apache.spark.Logging +initializeIfNecessary, initializeLogging, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning + + + + + + + + + + + + + + +Constructor Detail + + + + + +RRDD +publicRRDD(RDDTparent, +byte[]func, +Stringdeserializer, +Stringserializer, +byte[]packageNames, +Object[]broadcastVars, +scala.reflect.ClassTagTevidence$4) + + + + + + + + + +Method Detail + + + + + +createSparkContext +public staticJavaSparkContextcreateSparkContext(Stringmaster, + StringappName, + StringsparkHome, +
[47/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/cume_dist.html -- diff --git a/site/docs/1.6.3/api/R/cume_dist.html b/site/docs/1.6.3/api/R/cume_dist.html new file mode 100644 index 000..2fbb3cc --- /dev/null +++ b/site/docs/1.6.3/api/R/cume_dist.html @@ -0,0 +1,62 @@ + +R: cume_dist + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +cume_dist {SparkR}R Documentation + +cume_dist + +Description + +Window function: returns the cumulative distribution of values within a window partition, +i.e. the fraction of rows that are below the current row. + + + +Usage + + +## S4 method for signature 'missing' +cume_dist() + +cume_dist(x) + + + +Details + +N = total number of rows in the partition +cume_dist(x) = number of values before (and including) x / N + +This is equivalent to the CUME_DIST function in SQL. + + + +See Also + +Other window_funcs: dense_rank, +dense_rank; lag, +lag; lead, +lead; ntile, +ntile; percent_rank, +percent_rank; rank, +rank; row_number, +row_number + + + +Examples + +## Not run: cume_dist() + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/date_add.html -- diff --git a/site/docs/1.6.3/api/R/date_add.html b/site/docs/1.6.3/api/R/date_add.html new file mode 100644 index 000..f372923 --- /dev/null +++ b/site/docs/1.6.3/api/R/date_add.html @@ -0,0 +1,70 @@ + +R: date_add + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +date_add {SparkR}R Documentation + +date_add + +Description + +Returns the date that is 'days' days after 'start' + + + +Usage + + +## S4 method for signature 'Column,numeric' +date_add(y, x) + +date_add(y, x) + + + +See Also + +Other datetime_funcs: add_months, +add_months; date_format, +date_format; date_sub, +date_sub; datediff, +datediff; dayofmonth, +dayofmonth; dayofyear, +dayofyear; from_unixtime, +from_unixtime; +from_utc_timestamp, +from_utc_timestamp; hour, +hour; last_day, +last_day; minute, +minute; months_between, +months_between; month, +month; next_day, +next_day; quarter, +quarter; second, +second; to_date, +to_date; to_utc_timestamp, +to_utc_timestamp; +unix_timestamp, +unix_timestamp, +unix_timestamp, +unix_timestamp; weekofyear, +weekofyear; year, +year + + + +Examples + +## Not run: date_add(df$d, 1) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/date_format.html -- diff --git a/site/docs/1.6.3/api/R/date_format.html b/site/docs/1.6.3/api/R/date_format.html new file mode 100644 index 000..0050043 --- /dev/null +++ b/site/docs/1.6.3/api/R/date_format.html @@ -0,0 +1,80 @@ + +R: date_format + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +date_format {SparkR}R Documentation + +date_format + +Description + +Converts a date/timestamp/string to a value of string in the format specified by the date +format given by the second argument. + + + +Usage + + +## S4 method for signature 'Column,character' +date_format(y, x) + +date_format(y, x) + + + +Details + +A pattern could be for instance +dd.MM. and could return a string like '18.03.1993'. All +pattern letters of java.text.SimpleDateFormat can be used. +NOTE: Use when ever possible specialized functions like year. These benefit from a +specialized implementation. + + +See Also + +Other datetime_funcs: add_months, +add_months; date_add, +date_add; date_sub, +date_sub; datediff, +datediff; dayofmonth, +dayofmonth; dayofyear, +dayofyear; from_unixtime, +from_unixtime; +from_utc_timestamp, +from_utc_timestamp; hour, +hour; last_day, +last_day; minute, +minute; months_between, +months_between; month, +month; next_day, +next_day; quarter, +quarter; second, +second; to_date, +to_date; to_utc_timestamp, +to_utc_timestamp; +unix_timestamp, +unix_timestamp, +unix_timestamp, +unix_timestamp; weekofyear, +weekofyear; year, +year + + + +Examples + +## Not run: date_format(df$t, MM/dd/yyy) + + + +[Package SparkR version 1.6.3 Index] +
[41/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/row_number.html -- diff --git a/site/docs/1.6.3/api/R/row_number.html b/site/docs/1.6.3/api/R/row_number.html new file mode 100644 index 000..9ca48d8 --- /dev/null +++ b/site/docs/1.6.3/api/R/row_number.html @@ -0,0 +1,58 @@ + +R: row_number + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +row_number {SparkR}R Documentation + +row_number + +Description + +Window function: returns a sequential number starting at 1 within a window partition. + + + +Usage + + +## S4 method for signature 'missing' +row_number() + +row_number(x) + + + +Details + +This is equivalent to the ROW_NUMBER function in SQL. + + + +See Also + +Other window_funcs: cume_dist, +cume_dist; dense_rank, +dense_rank; lag, +lag; lead, +lead; ntile, +ntile; percent_rank, +percent_rank; rank, +rank + + + +Examples + +## Not run: row_number() + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/rpad.html -- diff --git a/site/docs/1.6.3/api/R/rpad.html b/site/docs/1.6.3/api/R/rpad.html new file mode 100644 index 000..2859dec --- /dev/null +++ b/site/docs/1.6.3/api/R/rpad.html @@ -0,0 +1,72 @@ + +R: rpad + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +rpad {SparkR}R Documentation + +rpad + +Description + +Right-padded with pad to a length of len. + + + +Usage + + +## S4 method for signature 'Column,numeric,character' +rpad(x, len, pad) + +rpad(x, len, pad) + + + +See Also + +Other string_funcs: ascii, +ascii; base64, +base64; concat_ws, +concat_ws; concat, +concat; decode, +decode; encode, +encode; format_number, +format_number; format_string, +format_string; initcap, +initcap; instr, +instr; length; +levenshtein, levenshtein; +locate, locate; +lower, lower; +lpad, lpad; +ltrim, ltrim; +regexp_extract, +regexp_extract; +regexp_replace, +regexp_replace; reverse, +reverse; rtrim, +rtrim; soundex, +soundex; substring_index, +substring_index; translate, +translate; trim, +trim; unbase64, +unbase64; upper, +upper + + + +Examples + +## Not run: rpad(df$c, 6, #) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/rtrim.html -- diff --git a/site/docs/1.6.3/api/R/rtrim.html b/site/docs/1.6.3/api/R/rtrim.html new file mode 100644 index 000..f8ee6e1 --- /dev/null +++ b/site/docs/1.6.3/api/R/rtrim.html @@ -0,0 +1,72 @@ + +R: rtrim + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +rtrim {SparkR}R Documentation + +rtrim + +Description + +Trim the spaces from right end for the specified string value. + + + +Usage + + +## S4 method for signature 'Column' +rtrim(x) + +rtrim(x) + + + +See Also + +Other string_funcs: ascii, +ascii; base64, +base64; concat_ws, +concat_ws; concat, +concat; decode, +decode; encode, +encode; format_number, +format_number; format_string, +format_string; initcap, +initcap; instr, +instr; length; +levenshtein, levenshtein; +locate, locate; +lower, lower; +lpad, lpad; +ltrim, ltrim; +regexp_extract, +regexp_extract; +regexp_replace, +regexp_replace; reverse, +reverse; rpad, +rpad; soundex, +soundex; substring_index, +substring_index; translate, +translate; trim, +trim; unbase64, +unbase64; upper, +upper + + + +Examples + +## Not run: rtrim(df$c) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/sample.html -- diff --git a/site/docs/1.6.3/api/R/sample.html b/site/docs/1.6.3/api/R/sample.html new file mode 100644 index 000..9fe6b56 --- /dev/null +++ b/site/docs/1.6.3/api/R/sample.html @@ -0,0 +1,153 @@ + +R: Sample + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js">
[09/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.html new file mode 100644 index 000..1eb7418 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.html @@ -0,0 +1,217 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +FlatMapGroupsFunction (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java.function +Interface FlatMapGroupsFunctionK,V,R + + + + + + +All Superinterfaces: +java.io.Serializable + + + +public interface FlatMapGroupsFunctionK,V,R +extends java.io.Serializable +A function that returns zero or more output records from each grouping key and its values. + + + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +IterableR +call(Kkey, +java.util.IteratorVvalues) + + + + + + + + + + + + + + + +Method Detail + + + + + + + +call +IterableRcall(Kkey, + java.util.IteratorVvalues) + throws Exception +Throws: +Exception + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/ForeachFunction.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/ForeachFunction.html b/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/ForeachFunction.html new file mode 100644 index 000..eda70d3 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/api/java/function/ForeachFunction.html @@ -0,0 +1,217 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +ForeachFunction (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.api.java.function +Interface ForeachFunctionT + + + + + + +All Superinterfaces: +java.io.Serializable + + + +public interface ForeachFunctionT +extends java.io.Serializable +Base interface for a function used in Dataset's foreach function. + + Spark will invoke the call function on each element in the input Dataset. + + + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +void +call(Tt) + + + + + + + + + + + + + + + +Method Detail + + + + + + + +call +voidcall(Tt) + throws Exception +Throws: +Exception + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + +
[25/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/Logging.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/Logging.html b/site/docs/1.6.3/api/java/org/apache/spark/Logging.html new file mode 100644 index 000..c26f499 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/Logging.html @@ -0,0 +1,423 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Logging (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Interface Logging + + + + + + +All Known Subinterfaces: +ActorHelper + + +All Known Implementing Classes: +AFTSurvivalRegression, AFTSurvivalRegressionModel, ALS, ALS, ALSModel, AssociationRules, AsyncRDDActions, BaseRRDD, Binarizer, BinaryClassificationMetrics, BisectingKMeans, BisectingKMeansModel, BlockMatrix, Broadcast, Bucketizer, ChiSqSelector, ChiSqSelectorModel, ClassificationModel, Classifier, CoGroupedRDD, Column, ColumnName, ColumnPruner, ConstantInputDStream, CountVectorizer, CountVectorizerModel, CrossValidator, CrossValidatorModel, DataFrameReader, Dataset, DataValidators, DCT, DecisionTree, DecisionTreeClassificationModel, DecisionTreeClassifier, DecisionTreeRegressionModel, DecisionTreeRegressor, DistributedLDAModel, DoubleRDDFunctions, DStream, EdgeRDD, EdgeRDDImpl, ElementwiseProduct, Estimator, ExecutionListenerManager, FPGrowth, GBTClassificationModel, GBTClassifier, GBTRegressionModel, GBTRegressor, GeneralizedLinearAlgorithm, GradientBoostedTrees, GradientDescent, GraphGenerators, GraphLoader, HadoopFsRelation, HadoopRDD, HashingTF, HiveCon text, IDF, IDFModel, IndexToString, InputDStream, InputFormatInfo, Interaction, IsotonicRegression, IsotonicRegressionModel, JdbcRDD, JobLogger, JobProgressListener, KMeans, KMeans, KMeansModel, KryoSerializer, LassoWithSGD, LBFGS, LDA, LDA, LDAModel, LinearRegression, LinearRegressionModel, LinearRegressionWithSGD, LocalLDAModel, LogisticRegression, LogisticRegressionModel, LogisticRegressionWithLBFGS, LogisticRegressionWithSGD, MapWithStateDSt ream, MatrixFactorizationModel, MinMaxScaler, MinMaxScalerModel, MLWriter, Model, MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier, href="../../../org/apache/spark/ml/classification/NaiveBayes.html" >title="class in org.apache.spark.ml.classification">NaiveBayes, href="../../../org/apache/spark/mllib/classification/NaiveBayes.html" >title="class in org.apache.spark.mllib.classification">NaiveBayes, href="../../../org/apache/spark/ml/classification/NaiveBayesModel.html" >title="class in org.apache.spark.ml.classification">NaiveBayesModel, href="../../../org/apache/spark/rdd/NewHadoopRDD.html" title="class in >org.apache.spark.rdd">NewHadoopRDD, href="../../../org/apache/spark/ml/feature/NGram.html" title="class in >org.apache.spark.ml.feature">NGram, href="../../../org/apache/spark/mllib/tree/model/Node.html" title="class in >org.apache.spark.mllib.tree.model">Node, href="../../../org/apache/spark/ml/feature/Normalizer.html" title="class in >org.apache.spark.ml.feature">Normalizer, href="../../../org/apache/spark/ml/feature/OneHotEn coder.html" title="class in org.apache.spark.ml.feature">OneHotEncoder, OneVsRest, OneVsRestModel, OrderedRDDFunctions, PageRank, PairRDDFunctions, PairwiseRRDD, PartitionPruningRDD, PCA, PCAModel, Pipeline, PipelineModel, PipelineStage, PolynomialExpansion, PredictionModel, Predictor, PrefixSpan, Pregel, ProbabilisticClassificationModel, ProbabilisticClassifier, QuantileDiscretizer, RandomForest, RandomForestClassificationModel, RandomForestClassifier, RandomForestRegressionModel, RandomForestRegressor, RankingMetrics, RDD, ReceiverInputDStream, RegexTokenizer, RegressionMetrics, RegressionModel, RFormula, RFormulaModel, RidgeRegressionWithSGD, RowMatrix, RRDD, ScriptTransformationWriterThread, SequenceFileRDDFunctions, ShuffledRDD, SizeEstimator, SparkConf, SparkContext, SparkEnv, SQLContext, SQLTransformer, StandardScaler, StandardScaler, StandardScalerModel, StatsReportListener, StopWordsRemover, StreamingContext, StreamingKMeans, StreamingKMeansModel, StreamingLinearAlgorithm, StreamingLinearRegressionWithSGD, StreamingLogisticRegressionWithSGD, StreamingTest, StringIndexer, StringIndexerModel, StringRRDD, SVMWithSGD, Tokenizer,
[18/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/Success.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/Success.html b/site/docs/1.6.3/api/java/org/apache/spark/Success.html new file mode 100644 index 000..6427e5a --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/Success.html @@ -0,0 +1,248 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +Success (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class Success + + + +Object + + +org.apache.spark.Success + + + + + + + +All Implemented Interfaces: +java.io.Serializable, TaskEndReason, scala.Equals, scala.Product + + + +public class Success +extends Object +implements TaskEndReason, scala.Product, scala.Serializable +:: DeveloperApi :: + Task succeeded. +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +Success() + + + + + + + + + +Method Summary + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + +Methods inherited from interfacescala.Product +productArity, productElement, productIterator, productPrefix + + + + + +Methods inherited from interfacescala.Equals +canEqual, equals + + + + + + + + + + + + + + +Constructor Detail + + + + + +Success +publicSuccess() + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/TaskCommitDenied.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/TaskCommitDenied.html b/site/docs/1.6.3/api/java/org/apache/spark/TaskCommitDenied.html new file mode 100644 index 000..9b8150e --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/TaskCommitDenied.html @@ -0,0 +1,350 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +TaskCommitDenied (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark +Class TaskCommitDenied + + + +Object + + +org.apache.spark.TaskCommitDenied + + + + + + + +All Implemented Interfaces: +java.io.Serializable, TaskEndReason, TaskFailedReason, scala.Equals, scala.Product + + + +public class TaskCommitDenied +extends Object +implements TaskFailedReason, scala.Product, scala.Serializable +:: DeveloperApi :: + Task requested the driver to commit, but was denied. +See Also:Serialized Form + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +TaskCommitDenied(intjobID, +intpartitionID, +intattemptNumber) + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +int +attemptNumber() + + +boolean +countTowardsTaskFailures() +If a task failed because its attempt to commit was denied, do not count this failure + towards failing the stage. + + + +int +jobID() + + +int +partitionID() + + +String +toErrorString() +Error message displayed in the web UI. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + +
[31/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/lib/jquery.js -- diff --git a/site/docs/1.6.3/api/java/lib/jquery.js b/site/docs/1.6.3/api/java/lib/jquery.js new file mode 100644 index 000..bc3fbc8 --- /dev/null +++ b/site/docs/1.6.3/api/java/lib/jquery.js @@ -0,0 +1,2 @@ +/*! jQuery v1.8.2 jquery.com | jquery.org/license */ +(function(a,b){function G(a){var b=F[a]={};return p.each(a.split(s),function(a,c){b[c]=!0}),b}function J(a,c,d){if(d===b&===1){var e="data-"+c.replace(I,"-$1").toLowerCase();d=a.getAttribute(e);if(typeof d=="string"){try{d=d==="true"?!0:d==="false"?!1:d==="null"?null:+d+""===d?+d:H.test(d)?p.parseJSON(d):d}catch(f){}p.data(a,c,d)}else d=b}return d}function K(a){var b;for(b in a){if(b==="data"&(a[b]))continue;if(b!=="toJSON")return!1}return!0}function ba(){return!1}function bb(){return!0}function bh(a){return!a||!a.parentNode||a.parentNode.nodeType===11}function bi(a,b){do a=a[b];while(a&!==1);return a}function bj(a,b,c){b=b||0;if(p.isFunction(b))return p.grep(a,function(a,d){var e=!!b.call(a,d,a);return e===c});if(b.nodeType)return p.grep(a,function(a,d){return a===b===c});if(typeof b=="string"){var d=p.grep(a,function(a){return a.nodeType===1});if(be.test(b))return p.filter(b,d,!c);b=p.filter(b,d)}return p.grep(a,function(a,d){return p.inArray( a,b)>=0===c})}function bk(a){var b=bl.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}function bC(a,b){return a.getElementsByTagName(b)[0]||a.appendChild(a.ownerDocument.createElement(b))}function bD(a,b){if(b.nodeType!==1||!p.hasData(a))return;var c,d,e,f=p._data(a),g=p._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;d").appendTo(e.body),c=b.css("display");b.remove();if(c==="none"||c===""){bI=e.body.appendChild(bI||p.extend(e.createElement("iframe"),{frameBorder:0,width:0,height:0}));if(!bJ||!bI. createElement)bJ=(bI.contentWindow||bI.contentDocument).document,bJ.write(""),bJ.close();b=bJ.body.appendChild(bJ.createElement(a)),c=bH(b,"display"),e.body.removeChild(bI)}return bS[a]=c,c}function ci(a,b,c,d){var e;if(p.isArray(b))p.each(b,function(b,e){c||ce.test(a)?d(a,e):ci(a+"["+(typeof e=="object"?b:"")+"]",e,c,d)});else if(!c&(b)==="object")for(e in b)ci(a+"["+e+"]",b[e],c,d);else d(a,b)}function cz(a){return function(b,c){typeof b!="string"&&(c=b,b="*");var
[04/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/graphx/EdgeContext.html -- diff --git a/site/docs/1.6.3/api/java/org/apache/spark/graphx/EdgeContext.html b/site/docs/1.6.3/api/java/org/apache/spark/graphx/EdgeContext.html new file mode 100644 index 000..b2057c9 --- /dev/null +++ b/site/docs/1.6.3/api/java/org/apache/spark/graphx/EdgeContext.html @@ -0,0 +1,404 @@ +http://www.w3.org/TR/html4/loose.dtd;> + + + + +EdgeContext (Spark 1.6.3 JavaDoc) + + + + + + + +JavaScript is disabled on your browser. + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + +org.apache.spark.graphx +Class EdgeContextVD,ED,A + + + +Object + + +org.apache.spark.graphx.EdgeContextVD,ED,A + + + + + + + +Direct Known Subclasses: +AggregatingEdgeContext + + + +public abstract class EdgeContextVD,ED,A +extends Object +Represents an edge along with its neighboring vertices and allows sending messages along the + edge. Used in Graph.aggregateMessages(scala.Function1org.apache.spark.graphx.EdgeContextVD, ED, A, scala.runtime.BoxedUnit, scala.Function2A, A, A, org.apache.spark.graphx.TripletFields, scala.reflect.ClassTagA). + + + + + + + + + + + +Constructor Summary + +Constructors + +Constructor and Description + + +EdgeContext() + + + + + + + + + +Method Summary + +Methods + +Modifier and Type +Method and Description + + +abstract ED +attr() +The attribute associated with the edge. + + + +abstract VD +dstAttr() +The vertex attribute of the edge's destination vertex. + + + +abstract long +dstId() +The vertex id of the edge's destination vertex. + + + +abstract void +sendToDst(Amsg) +Sends a message to the destination vertex. + + + +abstract void +sendToSrc(Amsg) +Sends a message to the source vertex. + + + +abstract VD +srcAttr() +The vertex attribute of the edge's source vertex. + + + +abstract long +srcId() +The vertex id of the edge's source vertex. + + + +EdgeTripletVD,ED +toEdgeTriplet() +Converts the edge and vertex properties into an EdgeTriplet for convenience. + + + +static VD,ED,Ascala.Somescala.Tuple5Object,Object,VD,VD,ED +unapply(EdgeContextVD,ED,Aedge) +Extractor mainly used for Graph#aggregateMessages*. + + + + + + + +Methods inherited from classObject +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait + + + + + + + + + + + + + + +Constructor Detail + + + + + +EdgeContext +publicEdgeContext() + + + + + + + + + +Method Detail + + + + + +unapply +public staticVD,ED,Ascala.Somescala.Tuple5Object,Object,VD,VD,EDunapply(EdgeContextVD,ED,Aedge) +Extractor mainly used for Graph#aggregateMessages*. + Example: + + val messages = graph.aggregateMessages( +case ctx EdgeContext(_, _, _, _, attr) = + ctx.sendToDst(attr) +, _ + _) + +Parameters:edge - (undocumented) +Returns:(undocumented) + + + + + + + +srcId +public abstractlongsrcId() +The vertex id of the edge's source vertex. + + + + + + + +dstId +public abstractlongdstId() +The vertex id of the edge's destination vertex. + + + + + + + +srcAttr +public abstractVDsrcAttr() +The vertex attribute of the edge's source vertex. + + + + + + + +dstAttr +public abstractVDdstAttr() +The vertex attribute of the edge's destination vertex. + + + + + + + +attr +public abstractEDattr() +The attribute associated with the edge. + + + + + + + + + +sendToSrc +public abstractvoidsendToSrc(Amsg) +Sends a message to the source vertex. + + + + + + + + + +sendToDst +public abstractvoidsendToDst(Amsg) +Sends a message to the destination vertex. + + + + + + + +toEdgeTriplet +publicEdgeTripletVD,EDtoEdgeTriplet() +Converts the edge and vertex properties into an EdgeTriplet for convenience. + + + + + + + + + + + + + + + + +Overview +Package +Class +Tree +Deprecated +Index +Help + + + + +Prev Class +Next Class + + +Frames +No Frames + + +All Classes + + + + + + + +Summary: +Nested| +Field| +Constr| +Method + + +Detail: +Field| +Constr| +Method + + + + + + + + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/java/org/apache/spark/graphx/EdgeDirection.html --
[39/51] [partial] spark-website git commit: Add 1.6.3 doc.
http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/str.html -- diff --git a/site/docs/1.6.3/api/R/str.html b/site/docs/1.6.3/api/R/str.html new file mode 100644 index 000..170cdb4 --- /dev/null +++ b/site/docs/1.6.3/api/R/str.html @@ -0,0 +1,135 @@ + +R: Compactly display the structure of a dataset + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +str {SparkR}R Documentation + +Compactly display the structure of a dataset + +Description + +Display the structure of a DataFrame, including column names, column types, as well as a +a small sample of rows. + + + +Usage + + +## S4 method for signature 'DataFrame' +str(object) + + + +Arguments + + +object + +a DataFrame + + + + +See Also + +Other DataFrame functions: $, +$-, select, +select, +select,DataFrame,Column-method, +select,DataFrame,list-method, +selectExpr; DataFrame-class, +dataFrame, groupedData; +[, [, [[, +subset; agg, +agg, +count,GroupedData-method, +summarize, summarize; +arrange, arrange, +arrange, orderBy, +orderBy; as.data.frame, +as.data.frame,DataFrame-method; +attach, +attach,DataFrame-method; +cache; collect; +colnames, colnames, +colnames-, colnames-, +columns, names, +names-; coltypes, +coltypes, coltypes-, +coltypes-; columns, +dtypes, printSchema, +schema, schema; +count, nrow; +describe, describe, +describe, summary, +summary, +summary,PipelineModel-method; +dim; distinct, +unique; dropna, +dropna, fillna, +fillna, na.omit, +na.omit; dtypes; +except, except; +explain, explain; +filter, filter, +where, where; +first, first; +groupBy, groupBy, +group_by, group_by; +head; insertInto, +insertInto; intersect, +intersect; isLocal, +isLocal; join; +limit, limit; +merge, merge; +mutate, mutate, +transform, transform; +ncol; persist; +printSchema; rbind, +rbind, unionAll, +unionAll; registerTempTable, +registerTempTable; rename, +rename, withColumnRenamed, +withColumnRenamed; +repartition; sample, +sample, sample_frac, +sample_frac; +saveAsParquetFile, +saveAsParquetFile, +write.parquet, write.parquet; +saveAsTable, saveAsTable; +saveDF, saveDF, +write.df, write.df, +write.df; selectExpr; +showDF, showDF; +show, show, +show,GroupedData-method; +take; unpersist; +withColumn, withColumn; +write.json, write.json; +write.text, write.text + + + +Examples + +## Not run: +##D # Create a DataFrame from the Iris dataset +##D irisDF - createDataFrame(sqlContext, iris) +##D +##D # Show the structure of the DataFrame +##D str(irisDF) +## End(Not run) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/struct.html -- diff --git a/site/docs/1.6.3/api/R/struct.html b/site/docs/1.6.3/api/R/struct.html new file mode 100644 index 000..e4b81fb --- /dev/null +++ b/site/docs/1.6.3/api/R/struct.html @@ -0,0 +1,62 @@ + +R: struct + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +struct {SparkR}R Documentation + +struct + +Description + +Creates a new struct column that composes multiple input columns. + + + +Usage + + +## S4 method for signature 'characterOrColumn' +struct(x, ...) + +struct(x, ...) + + + +See Also + +Other normal_funcs: abs; +bitwiseNOT, bitwiseNOT; +col, column, +column; expr, +expr; greatest, +greatest; ifelse; +is.nan, isnan, +isnan; least, +least; lit, +lit; nanvl, +nanvl; negate, +negate; randn, +randn, randn; +rand, rand, +rand; when + + + +Examples + +## Not run: +##D struct(df$c, df$d) +##D struct(col1, col2) +## End(Not run) + + + +[Package SparkR version 1.6.3 Index] + http://git-wip-us.apache.org/repos/asf/spark-website/blob/24d32b75/site/docs/1.6.3/api/R/structField.html -- diff --git a/site/docs/1.6.3/api/R/structField.html b/site/docs/1.6.3/api/R/structField.html new file mode 100644 index 000..9cb99a4 --- /dev/null +++ b/site/docs/1.6.3/api/R/structField.html @@ -0,0 +1,68 @@ + +R: structField + + + +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css;> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"> +https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"> +hljs.initHighlightingOnLoad(); + + +structField {SparkR}R Documentation + +structField + +Description + +Create a structField object that contains the metadata for
spark git commit: [SPARK-17183][SPARK-17983][SPARK-18101][SQL] put hive serde table schema to table properties like data source table
Repository: spark Updated Branches: refs/heads/branch-2.1 42386e796 -> d3b606690 [SPARK-17183][SPARK-17983][SPARK-18101][SQL] put hive serde table schema to table properties like data source table ## What changes were proposed in this pull request? For data source tables, we will put its table schema, partition columns, etc. to table properties, to work around some hive metastore issues, e.g. not case-preserving, bad decimal type support, etc. We should also do this for hive serde tables, to reduce the difference between hive serde tables and data source tables, e.g. column names should be case preserving. ## How was this patch tested? existing tests, and a new test in `HiveExternalCatalog` Author: Wenchen FanCloses #14750 from cloud-fan/minor1. (cherry picked from commit 95ec4e25bb65f37f80222ffe70a95993a9149f80) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d3b60669 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d3b60669 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d3b60669 Branch: refs/heads/branch-2.1 Commit: d3b6066900a16f5c4351ac9117d651fec9a84b51 Parents: 42386e7 Author: Wenchen Fan Authored: Sat Nov 5 00:58:50 2016 -0700 Committer: Reynold Xin Committed: Sat Nov 5 00:59:13 2016 -0700 -- .../sql/catalyst/catalog/ExternalCatalog.scala | 8 +- .../sql/catalyst/catalog/InMemoryCatalog.scala | 6 - .../org/apache/spark/sql/types/DataType.scala | 24 ++ .../catalyst/catalog/ExternalCatalogSuite.scala | 20 ++ .../org/apache/spark/sql/DataFrameWriter.scala | 10 +- .../spark/sql/execution/SparkSqlParser.scala| 4 +- .../spark/sql/execution/SparkStrategies.scala | 6 +- .../spark/sql/execution/command/ddl.scala | 4 +- .../spark/sql/execution/datasources/rules.scala | 5 +- .../spark/sql/hive/HiveExternalCatalog.scala| 218 ++- .../input1-2-d3aa54d5436b7b59ff5c7091b7ca6145 | 4 +- .../input2-1-e0efeda558cd0194f4764a5735147b16 | 4 +- .../input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd | 4 +- .../input2-4-235f92683416fab031e6e7490487b15b | 6 +- ...w_columns-2-b74990316ec4245fd8a7011e684b39da | 6 +- .../hive/PartitionedTablePerfStatsSuite.scala | 9 +- .../sql/hive/execution/SQLQuerySuite.scala | 4 +- 17 files changed, 245 insertions(+), 97 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d3b60669/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala index a5e0252..14dd707 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.catalog -import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException} +import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException} import org.apache.spark.sql.catalyst.expressions.Expression @@ -39,6 +39,12 @@ abstract class ExternalCatalog { } } + protected def requireTableExists(db: String, table: String): Unit = { +if (!tableExists(db, table)) { + throw new NoSuchTableException(db = db, table = table) +} + } + protected def requireFunctionExists(db: String, funcName: String): Unit = { if (!functionExists(db, funcName)) { throw new NoSuchFunctionException(db = db, func = funcName) http://git-wip-us.apache.org/repos/asf/spark/blob/d3b60669/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index ea675b7..bc39688 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -64,12 +64,6 @@ class InMemoryCatalog( catalog(db).tables(table).partitions.contains(spec) } - private def requireTableExists(db: String, table: String): Unit = { -if (!tableExists(db, table)) { - throw new
spark git commit: [SPARK-17183][SPARK-17983][SPARK-18101][SQL] put hive serde table schema to table properties like data source table
Repository: spark Updated Branches: refs/heads/master 6e2701815 -> 95ec4e25b [SPARK-17183][SPARK-17983][SPARK-18101][SQL] put hive serde table schema to table properties like data source table ## What changes were proposed in this pull request? For data source tables, we will put its table schema, partition columns, etc. to table properties, to work around some hive metastore issues, e.g. not case-preserving, bad decimal type support, etc. We should also do this for hive serde tables, to reduce the difference between hive serde tables and data source tables, e.g. column names should be case preserving. ## How was this patch tested? existing tests, and a new test in `HiveExternalCatalog` Author: Wenchen FanCloses #14750 from cloud-fan/minor1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/95ec4e25 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/95ec4e25 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/95ec4e25 Branch: refs/heads/master Commit: 95ec4e25bb65f37f80222ffe70a95993a9149f80 Parents: 6e27018 Author: Wenchen Fan Authored: Sat Nov 5 00:58:50 2016 -0700 Committer: Reynold Xin Committed: Sat Nov 5 00:58:50 2016 -0700 -- .../sql/catalyst/catalog/ExternalCatalog.scala | 8 +- .../sql/catalyst/catalog/InMemoryCatalog.scala | 6 - .../org/apache/spark/sql/types/DataType.scala | 24 ++ .../catalyst/catalog/ExternalCatalogSuite.scala | 20 ++ .../org/apache/spark/sql/DataFrameWriter.scala | 10 +- .../spark/sql/execution/SparkSqlParser.scala| 4 +- .../spark/sql/execution/SparkStrategies.scala | 6 +- .../spark/sql/execution/command/ddl.scala | 4 +- .../spark/sql/execution/datasources/rules.scala | 5 +- .../spark/sql/hive/HiveExternalCatalog.scala| 218 ++- .../input1-2-d3aa54d5436b7b59ff5c7091b7ca6145 | 4 +- .../input2-1-e0efeda558cd0194f4764a5735147b16 | 4 +- .../input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd | 4 +- .../input2-4-235f92683416fab031e6e7490487b15b | 6 +- ...w_columns-2-b74990316ec4245fd8a7011e684b39da | 6 +- .../hive/PartitionedTablePerfStatsSuite.scala | 9 +- .../sql/hive/execution/SQLQuerySuite.scala | 4 +- 17 files changed, 245 insertions(+), 97 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/95ec4e25/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala index a5e0252..14dd707 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.catalog -import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException} +import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException} import org.apache.spark.sql.catalyst.expressions.Expression @@ -39,6 +39,12 @@ abstract class ExternalCatalog { } } + protected def requireTableExists(db: String, table: String): Unit = { +if (!tableExists(db, table)) { + throw new NoSuchTableException(db = db, table = table) +} + } + protected def requireFunctionExists(db: String, funcName: String): Unit = { if (!functionExists(db, funcName)) { throw new NoSuchFunctionException(db = db, func = funcName) http://git-wip-us.apache.org/repos/asf/spark/blob/95ec4e25/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index ea675b7..bc39688 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -64,12 +64,6 @@ class InMemoryCatalog( catalog(db).tables(table).partitions.contains(spec) } - private def requireTableExists(db: String, table: String): Unit = { -if (!tableExists(db, table)) { - throw new NoSuchTableException(db = db, table = table) -} - } - private def requireTableNotExists(db: String, table: String): Unit
spark git commit: [SPARK-18260] Make from_json null safe
Repository: spark Updated Branches: refs/heads/master 8a9ca1924 -> 6e2701815 [SPARK-18260] Make from_json null safe ## What changes were proposed in this pull request? `from_json` is currently not safe against `null` rows. This PR adds a fix and a regression test for it. ## How was this patch tested? Regression test Author: Burak YavuzCloses #15771 from brkyvz/json_fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6e270181 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6e270181 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6e270181 Branch: refs/heads/master Commit: 6e2701815761d5870111cb56300e30d3059b39ed Parents: 8a9ca19 Author: Burak Yavuz Authored: Sat Nov 5 00:07:51 2016 -0700 Committer: Reynold Xin Committed: Sat Nov 5 00:07:51 2016 -0700 -- .../spark/sql/catalyst/expressions/jsonExpressions.scala | 4 +++- .../sql/catalyst/expressions/JsonExpressionsSuite.scala | 8 2 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6e270181/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index e034735..89fe7c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -498,7 +498,9 @@ case class JsonToStruct(schema: StructType, options: Map[String, String], child: override def children: Seq[Expression] = child :: Nil override def eval(input: InternalRow): Any = { -try parser.parse(child.eval(input).toString).head catch { +val json = child.eval(input) +if (json == null) return null +try parser.parse(json.toString).head catch { case _: SparkSQLJsonProcessingException => null } } http://git-wip-us.apache.org/repos/asf/spark/blob/6e270181/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index f9db649..3bfa0bf 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -344,6 +344,14 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { ) } + test("from_json null input column") { +val schema = StructType(StructField("a", IntegerType) :: Nil) +checkEvaluation( + JsonToStruct(schema, Map.empty, Literal(null)), + null +) + } + test("to_json") { val schema = StructType(StructField("a", IntegerType) :: Nil) val struct = Literal.create(create_row(1), schema) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18260] Make from_json null safe
Repository: spark Updated Branches: refs/heads/branch-2.1 707630147 -> 42386e796 [SPARK-18260] Make from_json null safe ## What changes were proposed in this pull request? `from_json` is currently not safe against `null` rows. This PR adds a fix and a regression test for it. ## How was this patch tested? Regression test Author: Burak YavuzCloses #15771 from brkyvz/json_fix. (cherry picked from commit 6e2701815761d5870111cb56300e30d3059b39ed) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/42386e79 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/42386e79 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/42386e79 Branch: refs/heads/branch-2.1 Commit: 42386e796f6519d22092fba88a8c42cba6511d7c Parents: 7076301 Author: Burak Yavuz Authored: Sat Nov 5 00:07:51 2016 -0700 Committer: Reynold Xin Committed: Sat Nov 5 00:08:00 2016 -0700 -- .../spark/sql/catalyst/expressions/jsonExpressions.scala | 4 +++- .../sql/catalyst/expressions/JsonExpressionsSuite.scala | 8 2 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/42386e79/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index e034735..89fe7c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -498,7 +498,9 @@ case class JsonToStruct(schema: StructType, options: Map[String, String], child: override def children: Seq[Expression] = child :: Nil override def eval(input: InternalRow): Any = { -try parser.parse(child.eval(input).toString).head catch { +val json = child.eval(input) +if (json == null) return null +try parser.parse(json.toString).head catch { case _: SparkSQLJsonProcessingException => null } } http://git-wip-us.apache.org/repos/asf/spark/blob/42386e79/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index f9db649..3bfa0bf 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -344,6 +344,14 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { ) } + test("from_json null input column") { +val schema = StructType(StructField("a", IntegerType) :: Nil) +checkEvaluation( + JsonToStruct(schema, Map.empty, Literal(null)), + null +) + } + test("to_json") { val schema = StructType(StructField("a", IntegerType) :: Nil) val struct = Literal.create(create_row(1), schema) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17710][FOLLOW UP] Add comments to state why 'Utils.classForName' is not used
Repository: spark Updated Branches: refs/heads/branch-2.1 491db67a5 -> 707630147 [SPARK-17710][FOLLOW UP] Add comments to state why 'Utils.classForName' is not used ## What changes were proposed in this pull request? Add comments. ## How was this patch tested? Build passed. Author: Weiqing YangCloses #15776 from weiqingy/SPARK-17710. (cherry picked from commit 8a9ca1924792d1a7c733bdfd757996b3ade0d63d) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/70763014 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/70763014 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/70763014 Branch: refs/heads/branch-2.1 Commit: 707630147e51114aa90f58f375df43bb2b5f7fb4 Parents: 491db67 Author: Weiqing Yang Authored: Fri Nov 4 23:44:46 2016 -0700 Committer: Reynold Xin Committed: Fri Nov 4 23:44:53 2016 -0700 -- core/src/main/scala/org/apache/spark/util/Utils.scala | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/70763014/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 22c28fb..1de66af 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2539,6 +2539,8 @@ private[util] object CallerContext extends Logging { val callerContextSupported: Boolean = { SparkHadoopUtil.get.conf.getBoolean("hadoop.caller.context.enabled", false) && { try { +// `Utils.classForName` will make `ReplSuite` fail with `ClassCircularityError` in +// master Maven build, so do not use it before resolving SPARK-17714. // scalastyle:off classforname Class.forName("org.apache.hadoop.ipc.CallerContext") Class.forName("org.apache.hadoop.ipc.CallerContext$Builder") @@ -2604,6 +2606,8 @@ private[spark] class CallerContext( def setCurrentContext(): Unit = { if (CallerContext.callerContextSupported) { try { +// `Utils.classForName` will make `ReplSuite` fail with `ClassCircularityError` in +// master Maven build, so do not use it before resolving SPARK-17714. // scalastyle:off classforname val callerContext = Class.forName("org.apache.hadoop.ipc.CallerContext") val builder = Class.forName("org.apache.hadoop.ipc.CallerContext$Builder") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17710][FOLLOW UP] Add comments to state why 'Utils.classForName' is not used
Repository: spark Updated Branches: refs/heads/master 0f7c9e84e -> 8a9ca1924 [SPARK-17710][FOLLOW UP] Add comments to state why 'Utils.classForName' is not used ## What changes were proposed in this pull request? Add comments. ## How was this patch tested? Build passed. Author: Weiqing YangCloses #15776 from weiqingy/SPARK-17710. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8a9ca192 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8a9ca192 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8a9ca192 Branch: refs/heads/master Commit: 8a9ca1924792d1a7c733bdfd757996b3ade0d63d Parents: 0f7c9e8 Author: Weiqing Yang Authored: Fri Nov 4 23:44:46 2016 -0700 Committer: Reynold Xin Committed: Fri Nov 4 23:44:46 2016 -0700 -- core/src/main/scala/org/apache/spark/util/Utils.scala | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8a9ca192/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 22c28fb..1de66af 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2539,6 +2539,8 @@ private[util] object CallerContext extends Logging { val callerContextSupported: Boolean = { SparkHadoopUtil.get.conf.getBoolean("hadoop.caller.context.enabled", false) && { try { +// `Utils.classForName` will make `ReplSuite` fail with `ClassCircularityError` in +// master Maven build, so do not use it before resolving SPARK-17714. // scalastyle:off classforname Class.forName("org.apache.hadoop.ipc.CallerContext") Class.forName("org.apache.hadoop.ipc.CallerContext$Builder") @@ -2604,6 +2606,8 @@ private[spark] class CallerContext( def setCurrentContext(): Unit = { if (CallerContext.callerContextSupported) { try { +// `Utils.classForName` will make `ReplSuite` fail with `ClassCircularityError` in +// master Maven build, so do not use it before resolving SPARK-17714. // scalastyle:off classforname val callerContext = Class.forName("org.apache.hadoop.ipc.CallerContext") val builder = Class.forName("org.apache.hadoop.ipc.CallerContext$Builder") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18189] [SQL] [Followup] Move test from ReplSuite to prevent java.lang.ClassCircularityError
Repository: spark Updated Branches: refs/heads/branch-2.1 0a303a694 -> 491db67a5 [SPARK-18189] [SQL] [Followup] Move test from ReplSuite to prevent java.lang.ClassCircularityError closes #15774 (cherry picked from commit 0f7c9e84e0d00813bf56712097677add5657f19f) Signed-off-by: Reynold XinProject: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/491db67a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/491db67a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/491db67a Branch: refs/heads/branch-2.1 Commit: 491db67a5fd067ef5e767ac4a07144722302d95a Parents: 0a303a6 Author: Reynold Xin Authored: Fri Nov 4 23:34:29 2016 -0700 Committer: Reynold Xin Committed: Fri Nov 4 23:35:04 2016 -0700 -- .../scala/org/apache/spark/repl/ReplSuite.scala| 17 - .../scala/org/apache/spark/sql/DatasetSuite.scala | 12 2 files changed, 12 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/491db67a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala -- diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 96d2dfc..9262e93 100644 --- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -473,21 +473,4 @@ class ReplSuite extends SparkFunSuite { assertDoesNotContain("AssertionError", output) assertDoesNotContain("Exception", output) } - - test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") { -val resultValue = 12345 -val output = runInterpreter("local", - s""" - |val keyValueGrouped = Seq((1, 2), (3, 4)).toDS().groupByKey(_._1) - |val mapGroups = keyValueGrouped.mapGroups((k, v) => (k, 1)) - |val broadcasted = sc.broadcast($resultValue) - | - |// Using broadcast triggers serialization issue in KeyValueGroupedDataset - |val dataset = mapGroups.map(_ => broadcasted.value) - |dataset.collect() - """.stripMargin) -assertDoesNotContain("error:", output) -assertDoesNotContain("Exception", output) -assertContains(s": Array[Int] = Array($resultValue, $resultValue)", output) - } } http://git-wip-us.apache.org/repos/asf/spark/blob/491db67a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 55f0487..6fa7b04 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -923,6 +923,18 @@ class DatasetSuite extends QueryTest with SharedSQLContext { .groupByKey(_.a).flatMapGroups { case (x, iter) => List[Int]() }) } + test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") { +val resultValue = 12345 +val keyValueGrouped = Seq((1, 2), (3, 4)).toDS().groupByKey(_._1) +val mapGroups = keyValueGrouped.mapGroups((k, v) => (k, 1)) +val broadcasted = spark.sparkContext.broadcast(resultValue) + +// Using broadcast triggers serialization issue in KeyValueGroupedDataset +val dataset = mapGroups.map(_ => broadcasted.value) + +assert(dataset.collect() sameElements Array(resultValue, resultValue)) + } + Seq(true, false).foreach { eager => def testCheckpointing(testName: String)(f: => Unit): Unit = { test(s"Dataset.checkpoint() - $testName (eager = $eager)") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18189] [SQL] [Followup] Move test from ReplSuite to prevent java.lang.ClassCircularityError
Repository: spark Updated Branches: refs/heads/master 0e3312ee7 -> 0f7c9e84e [SPARK-18189] [SQL] [Followup] Move test from ReplSuite to prevent java.lang.ClassCircularityError closes #15774 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f7c9e84 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f7c9e84 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f7c9e84 Branch: refs/heads/master Commit: 0f7c9e84e0d00813bf56712097677add5657f19f Parents: 0e3312e Author: Reynold XinAuthored: Fri Nov 4 23:34:29 2016 -0700 Committer: Reynold Xin Committed: Fri Nov 4 23:34:29 2016 -0700 -- .../scala/org/apache/spark/repl/ReplSuite.scala| 17 - .../scala/org/apache/spark/sql/DatasetSuite.scala | 12 2 files changed, 12 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0f7c9e84/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala -- diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 96d2dfc..9262e93 100644 --- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -473,21 +473,4 @@ class ReplSuite extends SparkFunSuite { assertDoesNotContain("AssertionError", output) assertDoesNotContain("Exception", output) } - - test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") { -val resultValue = 12345 -val output = runInterpreter("local", - s""" - |val keyValueGrouped = Seq((1, 2), (3, 4)).toDS().groupByKey(_._1) - |val mapGroups = keyValueGrouped.mapGroups((k, v) => (k, 1)) - |val broadcasted = sc.broadcast($resultValue) - | - |// Using broadcast triggers serialization issue in KeyValueGroupedDataset - |val dataset = mapGroups.map(_ => broadcasted.value) - |dataset.collect() - """.stripMargin) -assertDoesNotContain("error:", output) -assertDoesNotContain("Exception", output) -assertContains(s": Array[Int] = Array($resultValue, $resultValue)", output) - } } http://git-wip-us.apache.org/repos/asf/spark/blob/0f7c9e84/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 55f0487..6fa7b04 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -923,6 +923,18 @@ class DatasetSuite extends QueryTest with SharedSQLContext { .groupByKey(_.a).flatMapGroups { case (x, iter) => List[Int]() }) } + test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") { +val resultValue = 12345 +val keyValueGrouped = Seq((1, 2), (3, 4)).toDS().groupByKey(_._1) +val mapGroups = keyValueGrouped.mapGroups((k, v) => (k, 1)) +val broadcasted = spark.sparkContext.broadcast(resultValue) + +// Using broadcast triggers serialization issue in KeyValueGroupedDataset +val dataset = mapGroups.map(_ => broadcasted.value) + +assert(dataset.collect() sameElements Array(resultValue, resultValue)) + } + Seq(true, false).foreach { eager => def testCheckpointing(testName: String)(f: => Unit): Unit = { test(s"Dataset.checkpoint() - $testName (eager = $eager)") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18189][SQL][FOLLOWUP] Move test from ReplSuite to prevent java.lang.ClassCircularityError
Repository: spark Updated Branches: refs/heads/branch-2.0 399597b04 -> 8b99e204a [SPARK-18189][SQL][FOLLOWUP] Move test from ReplSuite to prevent java.lang.ClassCircularityError ## What changes were proposed in this pull request? Move the test which is causing java.lang.ClassCircularityError from ReplSuite to DatasetSuite. ## How was this patch tested? > build/mvn -DskipTests -Phadoop-2.3 -Pyarn -Phive -Phive-thriftserver > -Pkinesis-asl -Pmesos clean package > build/mvn -Dtest=none -DwildcardSuites=org.apache.spark.repl.ReplSuite test Author: Ergin SeyfeCloses #15774 from seyfe/fix_replsuite_test_error_branch2.0. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8b99e204 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8b99e204 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8b99e204 Branch: refs/heads/branch-2.0 Commit: 8b99e204a9a056fd071f9bd75f3e0a29f90bccc0 Parents: 399597b Author: Ergin Seyfe Authored: Fri Nov 4 23:29:20 2016 -0700 Committer: Reynold Xin Committed: Fri Nov 4 23:29:20 2016 -0700 -- .../scala/org/apache/spark/repl/ReplSuite.scala| 17 - .../scala/org/apache/spark/sql/DatasetSuite.scala | 13 + 2 files changed, 13 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8b99e204/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala -- diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 8deafe3..f7d7a4f 100644 --- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -473,21 +473,4 @@ class ReplSuite extends SparkFunSuite { assertDoesNotContain("AssertionError", output) assertDoesNotContain("Exception", output) } - - test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") { -val resultValue = 12345 -val output = runInterpreter("local", - s""" - |val keyValueGrouped = Seq((1, 2), (3, 4)).toDS().groupByKey(_._1) - |val mapGroups = keyValueGrouped.mapGroups((k, v) => (k, 1)) - |val broadcasted = sc.broadcast($resultValue) - | - |// Using broadcast triggers serialization issue in KeyValueGroupedDataset - |val dataset = mapGroups.map(_ => broadcasted.value) - |dataset.collect() - """.stripMargin) -assertDoesNotContain("error:", output) -assertDoesNotContain("Exception", output) -assertContains(s": Array[Int] = Array($resultValue, $resultValue)", output) - } } http://git-wip-us.apache.org/repos/asf/spark/blob/8b99e204/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index f897cfb..6113e5d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -882,6 +882,19 @@ class DatasetSuite extends QueryTest with SharedSQLContext { df.withColumn("b", expr("0")).as[ClassData] .groupByKey(_.a).flatMapGroups { case (x, iter) => List[Int]() }) } + + // This is moved from ReplSuite to prevent java.lang.ClassCircularityError. + test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") { +val resultValue = 12345 +val keyValueGrouped = Seq((1, 2), (3, 4)).toDS().groupByKey(_._1) +val mapGroups = keyValueGrouped.mapGroups((k, v) => (k, 1)) +val broadcasted = spark.sparkContext.broadcast(resultValue) + +// Using broadcast triggers serialization issue in KeyValueGroupedDataset +val dataset = mapGroups.map(_ => broadcasted.value) + +assert(dataset.collect() sameElements Array(resultValue, resultValue)) + } } case class Generic[T](id: T, value: Double) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17337][SPARK-16804][SQL][BRANCH-2.0] Backport subquery related PRs
Repository: spark Updated Branches: refs/heads/branch-2.0 c864e8a80 -> 399597b04 [SPARK-17337][SPARK-16804][SQL][BRANCH-2.0] Backport subquery related PRs ## What changes were proposed in this pull request? This PR backports two subquery related PRs to branch-2.0: - https://github.com/apache/spark/pull/14411 - https://github.com/apache/spark/pull/15761 ## How was this patch tested? Added a tests to `SubquerySuite`. Author: Nattavut SutyanyongAuthor: Herman van Hovell Closes #15772 from hvanhovell/SPARK-17337-2.0. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/399597b0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/399597b0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/399597b0 Branch: refs/heads/branch-2.0 Commit: 399597b04a83bbe3cc748c21446de0d808d08155 Parents: c864e8a Author: Herman van Hovell Authored: Fri Nov 4 15:54:58 2016 -0700 Committer: Reynold Xin Committed: Fri Nov 4 15:54:58 2016 -0700 -- .../spark/sql/catalyst/analysis/Analyzer.scala | 13 ++ .../sql/catalyst/optimizer/Optimizer.scala | 16 ++- .../catalyst/analysis/AnalysisErrorSuite.scala | 17 .../org/apache/spark/sql/SubquerySuite.scala| 44 4 files changed, 89 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/399597b0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 617f3e0..6332f92 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1025,6 +1025,19 @@ class Analyzer( case e: Expand => failOnOuterReferenceInSubTree(e, "an EXPAND") e +case l : LocalLimit => + failOnOuterReferenceInSubTree(l, "a LIMIT") + l +// Since LIMIT is represented as GlobalLimit(, (LocalLimit (, child)) +// and we are walking bottom up, we will fail on LocalLimit before +// reaching GlobalLimit. +// The code below is just a safety net. +case g : GlobalLimit => + failOnOuterReferenceInSubTree(g, "a LIMIT") + g +case s : Sample => + failOnOuterReferenceInSubTree(s, "a TABLESAMPLE") + s case p => failOnOuterReference(p) p http://git-wip-us.apache.org/repos/asf/spark/blob/399597b0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 4c06038..f0992b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1020,7 +1020,7 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper { // state and all the input rows processed before. In another word, the order of input rows // matters for non-deterministic expressions, while pushing down predicates changes the order. case filter @ Filter(condition, project @ Project(fields, grandChild)) - if fields.forall(_.deterministic) => + if fields.forall(_.deterministic) && canPushThroughCondition(grandChild, condition) => // Create a map of Aliases to their values from the child projection. // e.g., 'SELECT a + b AS c, d ...' produces Map(c -> a + b). @@ -1161,6 +1161,20 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper { filter } } + + /** + * Check if we can safely push a filter through a projection, by making sure that predicate + * subqueries in the condition do not contain the same attributes as the plan they are moved + * into. This can happen when the plan and predicate subquery have the same source. + */ + private def canPushThroughCondition(plan: LogicalPlan, condition: Expression): Boolean = { +val attributes = plan.outputSet +val matched = condition.find { + case PredicateSubquery(p, _, _, _) => p.outputSet.intersect(attributes).nonEmpty + case _ => false +} +matched.isEmpty + } } /**
spark git commit: [SPARK-18197][CORE] Optimise AppendOnlyMap implementation
Repository: spark Updated Branches: refs/heads/branch-2.1 cfe76028b -> a2d7e25e7 [SPARK-18197][CORE] Optimise AppendOnlyMap implementation ## What changes were proposed in this pull request? This improvement works by using the fastest comparison test first and we observed a 1% throughput performance improvement on PageRank (HiBench large profile) with this change. We used tprof and before the change in AppendOnlyMap.changeValue (where the optimisation occurs) this method was being used for 8053 profiling ticks representing 0.72% of the overall application time. After this change we observed this method only occurring for 2786 ticks and for 0.25% of the overall time. ## How was this patch tested? Existing unit tests and for performance we used HiBench large, profiling with tprof and IBM Healthcenter. Author: Adam RobertsCloses #15714 from a-roberts/patch-9. (cherry picked from commit a42d738c5de08bd395a7c220c487146173c6c163) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2d7e25e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2d7e25e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2d7e25e Branch: refs/heads/branch-2.1 Commit: a2d7e25e7c85ce17c8ceac5e1806afe96d3acc14 Parents: cfe7602 Author: Adam Roberts Authored: Fri Nov 4 12:06:06 2016 -0700 Committer: Reynold Xin Committed: Fri Nov 4 12:06:12 2016 -0700 -- .../org/apache/spark/util/collection/AppendOnlyMap.scala | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a2d7e25e/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala index 6b74a29..bcb95b4 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala @@ -140,16 +140,16 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) var i = 1 while (true) { val curKey = data(2 * pos) - if (k.eq(curKey) || k.equals(curKey)) { -val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V]) -data(2 * pos + 1) = newValue.asInstanceOf[AnyRef] -return newValue - } else if (curKey.eq(null)) { + if (curKey.eq(null)) { val newValue = updateFunc(false, null.asInstanceOf[V]) data(2 * pos) = k data(2 * pos + 1) = newValue.asInstanceOf[AnyRef] incrementSize() return newValue + } else if (k.eq(curKey) || k.equals(curKey)) { +val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V]) +data(2 * pos + 1) = newValue.asInstanceOf[AnyRef] +return newValue } else { val delta = i pos = (pos + delta) & mask - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18197][CORE] Optimise AppendOnlyMap implementation
Repository: spark Updated Branches: refs/heads/master 14f235d56 -> a42d738c5 [SPARK-18197][CORE] Optimise AppendOnlyMap implementation ## What changes were proposed in this pull request? This improvement works by using the fastest comparison test first and we observed a 1% throughput performance improvement on PageRank (HiBench large profile) with this change. We used tprof and before the change in AppendOnlyMap.changeValue (where the optimisation occurs) this method was being used for 8053 profiling ticks representing 0.72% of the overall application time. After this change we observed this method only occurring for 2786 ticks and for 0.25% of the overall time. ## How was this patch tested? Existing unit tests and for performance we used HiBench large, profiling with tprof and IBM Healthcenter. Author: Adam RobertsCloses #15714 from a-roberts/patch-9. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a42d738c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a42d738c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a42d738c Branch: refs/heads/master Commit: a42d738c5de08bd395a7c220c487146173c6c163 Parents: 14f235d Author: Adam Roberts Authored: Fri Nov 4 12:06:06 2016 -0700 Committer: Reynold Xin Committed: Fri Nov 4 12:06:06 2016 -0700 -- .../org/apache/spark/util/collection/AppendOnlyMap.scala | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a42d738c/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala index 6b74a29..bcb95b4 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala @@ -140,16 +140,16 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) var i = 1 while (true) { val curKey = data(2 * pos) - if (k.eq(curKey) || k.equals(curKey)) { -val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V]) -data(2 * pos + 1) = newValue.asInstanceOf[AnyRef] -return newValue - } else if (curKey.eq(null)) { + if (curKey.eq(null)) { val newValue = updateFunc(false, null.asInstanceOf[V]) data(2 * pos) = k data(2 * pos + 1) = newValue.asInstanceOf[AnyRef] incrementSize() return newValue + } else if (k.eq(curKey) || k.equals(curKey)) { +val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V]) +data(2 * pos + 1) = newValue.asInstanceOf[AnyRef] +return newValue } else { val delta = i pos = (pos + delta) & mask - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Closing some stale/invalid pull requests
Repository: spark Updated Branches: refs/heads/master 27602c337 -> 14f235d56 Closing some stale/invalid pull requests Closes #15758 Closes #15753 Closes #12708 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/14f235d5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/14f235d5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/14f235d5 Branch: refs/heads/master Commit: 14f235d5643bca75e270652c15154d86e57a7a70 Parents: 27602c3 Author: Reynold XinAuthored: Fri Nov 4 01:27:06 2016 -0700 Committer: Reynold Xin Committed: Fri Nov 4 01:27:06 2016 -0700 -- -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18200][GRAPHX][FOLLOW-UP] Support zero as an initial capacity in OpenHashSet
Repository: spark Updated Branches: refs/heads/branch-2.0 dae1581d9 -> c864e8a80 [SPARK-18200][GRAPHX][FOLLOW-UP] Support zero as an initial capacity in OpenHashSet ## What changes were proposed in this pull request? This is a follow-up PR of #15741 in order to keep `nextPowerOf2` consistent. **Before** ``` nextPowerOf2(0) => 2 nextPowerOf2(1) => 1 nextPowerOf2(2) => 2 nextPowerOf2(3) => 4 nextPowerOf2(4) => 4 nextPowerOf2(5) => 8 ``` **After** ``` nextPowerOf2(0) => 1 nextPowerOf2(1) => 1 nextPowerOf2(2) => 2 nextPowerOf2(3) => 4 nextPowerOf2(4) => 4 nextPowerOf2(5) => 8 ``` ## How was this patch tested? N/A Author: Dongjoon HyunCloses #15754 from dongjoon-hyun/SPARK-18200-2. (cherry picked from commit 27602c33751cebf6cd173c0de103454608cf6625) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c864e8a8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c864e8a8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c864e8a8 Branch: refs/heads/branch-2.0 Commit: c864e8a8020f4890f1839766851e7f4917da5c70 Parents: dae1581 Author: Dongjoon Hyun Authored: Thu Nov 3 23:15:33 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 23:17:15 2016 -0700 -- .../main/scala/org/apache/spark/util/collection/OpenHashSet.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c864e8a8/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala index 7a1be85..60f6f53 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala @@ -272,7 +272,7 @@ class OpenHashSet[@specialized(Long, Int) T: ClassTag]( private def nextPowerOf2(n: Int): Int = { if (n == 0) { - 2 + 1 } else { val highBit = Integer.highestOneBit(n) if (highBit == n) n else highBit << 1 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18200][GRAPHX][FOLLOW-UP] Support zero as an initial capacity in OpenHashSet
Repository: spark Updated Branches: refs/heads/branch-2.1 8e145a94b -> cfe76028b [SPARK-18200][GRAPHX][FOLLOW-UP] Support zero as an initial capacity in OpenHashSet ## What changes were proposed in this pull request? This is a follow-up PR of #15741 in order to keep `nextPowerOf2` consistent. **Before** ``` nextPowerOf2(0) => 2 nextPowerOf2(1) => 1 nextPowerOf2(2) => 2 nextPowerOf2(3) => 4 nextPowerOf2(4) => 4 nextPowerOf2(5) => 8 ``` **After** ``` nextPowerOf2(0) => 1 nextPowerOf2(1) => 1 nextPowerOf2(2) => 2 nextPowerOf2(3) => 4 nextPowerOf2(4) => 4 nextPowerOf2(5) => 8 ``` ## How was this patch tested? N/A Author: Dongjoon HyunCloses #15754 from dongjoon-hyun/SPARK-18200-2. (cherry picked from commit 27602c33751cebf6cd173c0de103454608cf6625) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cfe76028 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cfe76028 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cfe76028 Branch: refs/heads/branch-2.1 Commit: cfe76028bb116d72eab6601bff3b2a1856597370 Parents: 8e145a9 Author: Dongjoon Hyun Authored: Thu Nov 3 23:15:33 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 23:17:07 2016 -0700 -- .../main/scala/org/apache/spark/util/collection/OpenHashSet.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cfe76028/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala index 7a1be85..60f6f53 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala @@ -272,7 +272,7 @@ class OpenHashSet[@specialized(Long, Int) T: ClassTag]( private def nextPowerOf2(n: Int): Int = { if (n == 0) { - 2 + 1 } else { val highBit = Integer.highestOneBit(n) if (highBit == n) n else highBit << 1 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18200][GRAPHX][FOLLOW-UP] Support zero as an initial capacity in OpenHashSet
Repository: spark Updated Branches: refs/heads/master a08463b1d -> 27602c337 [SPARK-18200][GRAPHX][FOLLOW-UP] Support zero as an initial capacity in OpenHashSet ## What changes were proposed in this pull request? This is a follow-up PR of #15741 in order to keep `nextPowerOf2` consistent. **Before** ``` nextPowerOf2(0) => 2 nextPowerOf2(1) => 1 nextPowerOf2(2) => 2 nextPowerOf2(3) => 4 nextPowerOf2(4) => 4 nextPowerOf2(5) => 8 ``` **After** ``` nextPowerOf2(0) => 1 nextPowerOf2(1) => 1 nextPowerOf2(2) => 2 nextPowerOf2(3) => 4 nextPowerOf2(4) => 4 nextPowerOf2(5) => 8 ``` ## How was this patch tested? N/A Author: Dongjoon HyunCloses #15754 from dongjoon-hyun/SPARK-18200-2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27602c33 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27602c33 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27602c33 Branch: refs/heads/master Commit: 27602c33751cebf6cd173c0de103454608cf6625 Parents: a08463b Author: Dongjoon Hyun Authored: Thu Nov 3 23:15:33 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 23:15:33 2016 -0700 -- .../main/scala/org/apache/spark/util/collection/OpenHashSet.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/27602c33/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala index 7a1be85..60f6f53 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala @@ -272,7 +272,7 @@ class OpenHashSet[@specialized(Long, Int) T: ClassTag]( private def nextPowerOf2(n: Int): Int = { if (n == 0) { - 2 + 1 } else { val highBit = Integer.highestOneBit(n) if (highBit == n) n else highBit << 1 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18259][SQL] Do not capture Throwable in QueryExecution
Repository: spark Updated Branches: refs/heads/branch-2.1 37550c492 -> 91d567150 [SPARK-18259][SQL] Do not capture Throwable in QueryExecution ## What changes were proposed in this pull request? `QueryExecution.toString` currently captures `java.lang.Throwable`s; this is far from a best practice and can lead to confusing situation or invalid application states. This PR fixes this by only capturing `AnalysisException`s. ## How was this patch tested? Added a `QueryExecutionSuite`. Author: Herman van HovellCloses #15760 from hvanhovell/SPARK-18259. (cherry picked from commit aa412c55e31e61419d3de57ef4b13e50f9b38af0) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/91d56715 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/91d56715 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/91d56715 Branch: refs/heads/branch-2.1 Commit: 91d567150b305d05acb8543da5cbf21df244352d Parents: 37550c4 Author: Herman van Hovell Authored: Thu Nov 3 21:59:59 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 22:00:23 2016 -0700 -- .../spark/sql/execution/QueryExecution.scala| 2 +- .../sql/execution/QueryExecutionSuite.scala | 50 2 files changed, 51 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/91d56715/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index cb45a6d..b3ef29f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -104,7 +104,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) { ReuseSubquery(sparkSession.sessionState.conf)) protected def stringOrError[A](f: => A): String = -try f.toString catch { case e: Throwable => e.toString } +try f.toString catch { case e: AnalysisException => e.toString } /** http://git-wip-us.apache.org/repos/asf/spark/blob/91d56715/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala new file mode 100644 index 000..8bceab3 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation} +import org.apache.spark.sql.test.SharedSQLContext + +class QueryExecutionSuite extends SharedSQLContext { + test("toString() exception/error handling") { +val badRule = new SparkStrategy { + var mode: String = "" + override def apply(plan: LogicalPlan): Seq[SparkPlan] = mode.toLowerCase match { +case "exception" => throw new AnalysisException(mode) +case "error" => throw new Error(mode) +case _ => Nil + } +} +spark.experimental.extraStrategies = badRule :: Nil + +def qe: QueryExecution = new QueryExecution(spark, OneRowRelation) + +// Nothing! +badRule.mode = "" +assert(qe.toString.contains("OneRowRelation")) + +// Throw an AnalysisException - this should be captured. +badRule.mode = "exception" +assert(qe.toString.contains("org.apache.spark.sql.AnalysisException")) + +// Throw an Error - this should not be captured. +badRule.mode = "error" +val error
spark git commit: [SPARK-18259][SQL] Do not capture Throwable in QueryExecution
Repository: spark Updated Branches: refs/heads/master dc4c60098 -> aa412c55e [SPARK-18259][SQL] Do not capture Throwable in QueryExecution ## What changes were proposed in this pull request? `QueryExecution.toString` currently captures `java.lang.Throwable`s; this is far from a best practice and can lead to confusing situation or invalid application states. This PR fixes this by only capturing `AnalysisException`s. ## How was this patch tested? Added a `QueryExecutionSuite`. Author: Herman van HovellCloses #15760 from hvanhovell/SPARK-18259. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aa412c55 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aa412c55 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aa412c55 Branch: refs/heads/master Commit: aa412c55e31e61419d3de57ef4b13e50f9b38af0 Parents: dc4c600 Author: Herman van Hovell Authored: Thu Nov 3 21:59:59 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 21:59:59 2016 -0700 -- .../spark/sql/execution/QueryExecution.scala| 2 +- .../sql/execution/QueryExecutionSuite.scala | 50 2 files changed, 51 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/aa412c55/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index cb45a6d..b3ef29f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -104,7 +104,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) { ReuseSubquery(sparkSession.sessionState.conf)) protected def stringOrError[A](f: => A): String = -try f.toString catch { case e: Throwable => e.toString } +try f.toString catch { case e: AnalysisException => e.toString } /** http://git-wip-us.apache.org/repos/asf/spark/blob/aa412c55/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala new file mode 100644 index 000..8bceab3 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation} +import org.apache.spark.sql.test.SharedSQLContext + +class QueryExecutionSuite extends SharedSQLContext { + test("toString() exception/error handling") { +val badRule = new SparkStrategy { + var mode: String = "" + override def apply(plan: LogicalPlan): Seq[SparkPlan] = mode.toLowerCase match { +case "exception" => throw new AnalysisException(mode) +case "error" => throw new Error(mode) +case _ => Nil + } +} +spark.experimental.extraStrategies = badRule :: Nil + +def qe: QueryExecution = new QueryExecution(spark, OneRowRelation) + +// Nothing! +badRule.mode = "" +assert(qe.toString.contains("OneRowRelation")) + +// Throw an AnalysisException - this should be captured. +badRule.mode = "exception" +assert(qe.toString.contains("org.apache.spark.sql.AnalysisException")) + +// Throw an Error - this should not be captured. +badRule.mode = "error" +val error = intercept[Error](qe.toString) +assert(error.getMessage.contains("error")) + } +}
spark git commit: [SPARK-18138][DOCS] Document that Java 7, Python 2.6, Scala 2.10, Hadoop < 2.6 are deprecated in Spark 2.1.0
Repository: spark Updated Branches: refs/heads/branch-2.1 af60b1ebb -> 37550c492 [SPARK-18138][DOCS] Document that Java 7, Python 2.6, Scala 2.10, Hadoop < 2.6 are deprecated in Spark 2.1.0 ## What changes were proposed in this pull request? Document that Java 7, Python 2.6, Scala 2.10, Hadoop < 2.6 are deprecated in Spark 2.1.0. This does not actually implement any of the change in SPARK-18138, just peppers the documentation with notices about it. ## How was this patch tested? Doc build Author: Sean OwenCloses #15733 from srowen/SPARK-18138. (cherry picked from commit dc4c60098641cf64007e2f0e36378f000ad5f6b1) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/37550c49 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/37550c49 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/37550c49 Branch: refs/heads/branch-2.1 Commit: 37550c49218e1890f8adc10c9549a23dc072e21f Parents: af60b1e Author: Sean Owen Authored: Thu Nov 3 17:27:23 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 17:27:44 2016 -0700 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 12 docs/building-spark.md | 6 ++ docs/index.md | 4 docs/programming-guide.md | 4 python/pyspark/context.py | 4 5 files changed, 30 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/37550c49/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 63478c8..9f0f607 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -183,6 +183,8 @@ class SparkContext(config: SparkConf) extends Logging { // log out Spark Version in Spark driver log logInfo(s"Running Spark version $SPARK_VERSION") + warnDeprecatedVersions() + /* - * | Private variables. These variables keep the internal state of the context, and are| | not accessible by the outside world. They're mutable since we want to initialize all | @@ -346,6 +348,16 @@ class SparkContext(config: SparkConf) extends Logging { value } + private def warnDeprecatedVersions(): Unit = { +val javaVersion = System.getProperty("java.version").split("[+.\\-]+", 3) +if (javaVersion.length >= 2 && javaVersion(1).toInt == 7) { + logWarning("Support for Java 7 is deprecated as of Spark 2.0.0") +} +if (scala.util.Properties.releaseVersion.exists(_.startsWith("2.10"))) { + logWarning("Support for Scala 2.10 is deprecated as of Spark 2.1.0") +} + } + /** Control our logLevel. This overrides any user-defined log settings. * @param logLevel The desired log level as a string. * Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN http://git-wip-us.apache.org/repos/asf/spark/blob/37550c49/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index ebe46a4..2b404bd 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -13,6 +13,7 @@ redirect_from: "building-with-maven.html" The Maven-based build is the build of reference for Apache Spark. Building Spark using Maven requires Maven 3.3.9 or newer and Java 7+. +Note that support for Java 7 is deprecated as of Spark 2.0.0 and may be removed in Spark 2.2.0. ### Setting up Maven's Memory Usage @@ -79,6 +80,9 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro +Note that support for versions of Hadoop before 2.6 are deprecated as of Spark 2.1.0 and may be +removed in Spark 2.2.0. + You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different from `hadoop.version`. Spark only supports YARN versions 2.2.0 and later. @@ -129,6 +133,8 @@ To produce a Spark package compiled with Scala 2.10, use the `-Dscala-2.10` prop ./dev/change-scala-version.sh 2.10 ./build/mvn -Pyarn -Phadoop-2.4 -Dscala-2.10 -DskipTests clean package + +Note that support for Scala 2.10 is deprecated as of Spark 2.1.0 and may be removed in Spark 2.2.0. ## Building submodules individually http://git-wip-us.apache.org/repos/asf/spark/blob/37550c49/docs/index.md
spark git commit: [SPARK-18138][DOCS] Document that Java 7, Python 2.6, Scala 2.10, Hadoop < 2.6 are deprecated in Spark 2.1.0
Repository: spark Updated Branches: refs/heads/master f22954ad4 -> dc4c60098 [SPARK-18138][DOCS] Document that Java 7, Python 2.6, Scala 2.10, Hadoop < 2.6 are deprecated in Spark 2.1.0 ## What changes were proposed in this pull request? Document that Java 7, Python 2.6, Scala 2.10, Hadoop < 2.6 are deprecated in Spark 2.1.0. This does not actually implement any of the change in SPARK-18138, just peppers the documentation with notices about it. ## How was this patch tested? Doc build Author: Sean OwenCloses #15733 from srowen/SPARK-18138. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dc4c6009 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dc4c6009 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dc4c6009 Branch: refs/heads/master Commit: dc4c60098641cf64007e2f0e36378f000ad5f6b1 Parents: f22954a Author: Sean Owen Authored: Thu Nov 3 17:27:23 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 17:27:23 2016 -0700 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 12 docs/building-spark.md | 6 ++ docs/index.md | 4 docs/programming-guide.md | 4 python/pyspark/context.py | 4 5 files changed, 30 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dc4c6009/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 63478c8..9f0f607 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -183,6 +183,8 @@ class SparkContext(config: SparkConf) extends Logging { // log out Spark Version in Spark driver log logInfo(s"Running Spark version $SPARK_VERSION") + warnDeprecatedVersions() + /* - * | Private variables. These variables keep the internal state of the context, and are| | not accessible by the outside world. They're mutable since we want to initialize all | @@ -346,6 +348,16 @@ class SparkContext(config: SparkConf) extends Logging { value } + private def warnDeprecatedVersions(): Unit = { +val javaVersion = System.getProperty("java.version").split("[+.\\-]+", 3) +if (javaVersion.length >= 2 && javaVersion(1).toInt == 7) { + logWarning("Support for Java 7 is deprecated as of Spark 2.0.0") +} +if (scala.util.Properties.releaseVersion.exists(_.startsWith("2.10"))) { + logWarning("Support for Scala 2.10 is deprecated as of Spark 2.1.0") +} + } + /** Control our logLevel. This overrides any user-defined log settings. * @param logLevel The desired log level as a string. * Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN http://git-wip-us.apache.org/repos/asf/spark/blob/dc4c6009/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index ebe46a4..2b404bd 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -13,6 +13,7 @@ redirect_from: "building-with-maven.html" The Maven-based build is the build of reference for Apache Spark. Building Spark using Maven requires Maven 3.3.9 or newer and Java 7+. +Note that support for Java 7 is deprecated as of Spark 2.0.0 and may be removed in Spark 2.2.0. ### Setting up Maven's Memory Usage @@ -79,6 +80,9 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro +Note that support for versions of Hadoop before 2.6 are deprecated as of Spark 2.1.0 and may be +removed in Spark 2.2.0. + You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different from `hadoop.version`. Spark only supports YARN versions 2.2.0 and later. @@ -129,6 +133,8 @@ To produce a Spark package compiled with Scala 2.10, use the `-Dscala-2.10` prop ./dev/change-scala-version.sh 2.10 ./build/mvn -Pyarn -Phadoop-2.4 -Dscala-2.10 -DskipTests clean package + +Note that support for Scala 2.10 is deprecated as of Spark 2.1.0 and may be removed in Spark 2.2.0. ## Building submodules individually http://git-wip-us.apache.org/repos/asf/spark/blob/dc4c6009/docs/index.md -- diff --git a/docs/index.md b/docs/index.md index a7a92f6..fe51439 100644 --- a/docs/index.md
spark git commit: [SPARK-18257][SS] Improve error reporting for FileStressSuite
Repository: spark Updated Branches: refs/heads/branch-2.1 2daca62cd -> af60b1ebb [SPARK-18257][SS] Improve error reporting for FileStressSuite ## What changes were proposed in this pull request? This patch improves error reporting for FileStressSuite, when there is an error in Spark itself (not user code). This works by simply tightening the exception verification, and gets rid of the unnecessary thread for starting the stream. Also renamed the class FileStreamStressSuite to make it more obvious it is a streaming suite. ## How was this patch tested? This is a test only change and I manually verified error reporting by injecting some bug in the addBatch code for FileStreamSink. Author: Reynold Xin <r...@databricks.com> Closes #15757 from rxin/SPARK-18257. (cherry picked from commit f22954ad49bf5a32c7b6d8487cd38ffe0da904ca) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/af60b1eb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/af60b1eb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/af60b1eb Branch: refs/heads/branch-2.1 Commit: af60b1ebbf5cb91dc724aad9d3d7476ce9085ac9 Parents: 2daca62 Author: Reynold Xin <r...@databricks.com> Authored: Thu Nov 3 15:30:45 2016 -0700 Committer: Reynold Xin <r...@databricks.com> Committed: Thu Nov 3 15:30:55 2016 -0700 -- .../sql/streaming/FileStreamStressSuite.scala | 156 +++ .../spark/sql/streaming/FileStressSuite.scala | 153 -- 2 files changed, 156 insertions(+), 153 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/af60b1eb/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala new file mode 100644 index 000..28412ea --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.streaming + +import java.io.File +import java.util.UUID + +import scala.util.Random +import scala.util.control.NonFatal + +import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.util.Utils + +/** + * A stress test for streaming queries that read and write files. This test consists of + * two threads: + * - one that writes out `numRecords` distinct integers to files of random sizes (the total + *number of records is fixed but each files size / creation time is random). + * - another that continually restarts a buggy streaming query (i.e. fails with 5% probability on + *any partition). + * + * At the end, the resulting files are loaded and the answer is checked. + */ +class FileStreamStressSuite extends StreamTest { + import testImplicits._ + + // Error message thrown in the streaming job for testing recovery. + private val injectedErrorMsg = "test suite injected failure!" + + testQuietly("fault tolerance stress test - unpartitioned output") { +stressTest(partitionWrites = false) + } + + testQuietly("fault tolerance stress test - partitioned output") { +stressTest(partitionWrites = true) + } + + def stressTest(partitionWrites: Boolean): Unit = { +val numRecords = 1 +val inputDir = Utils.createTempDir(namePrefix = "stream.input").getCanonicalPath +val stagingDir = Utils.createTempDir(namePrefix = "stream.staging").getCanonicalPath +val outputDir = Utils.createTempDir(namePrefix = "stream.output").getCanonicalPath +val checkpoint = Utils.createTempDir(namePrefix = "stream.checkpoint").getCanonicalPath + +@volatile +var continue = true +@volatile +var stream: StreamingQuery = n
spark git commit: [SPARK-18257][SS] Improve error reporting for FileStressSuite
Repository: spark Updated Branches: refs/heads/master e89202523 -> f22954ad4 [SPARK-18257][SS] Improve error reporting for FileStressSuite ## What changes were proposed in this pull request? This patch improves error reporting for FileStressSuite, when there is an error in Spark itself (not user code). This works by simply tightening the exception verification, and gets rid of the unnecessary thread for starting the stream. Also renamed the class FileStreamStressSuite to make it more obvious it is a streaming suite. ## How was this patch tested? This is a test only change and I manually verified error reporting by injecting some bug in the addBatch code for FileStreamSink. Author: Reynold Xin <r...@databricks.com> Closes #15757 from rxin/SPARK-18257. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f22954ad Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f22954ad Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f22954ad Branch: refs/heads/master Commit: f22954ad49bf5a32c7b6d8487cd38ffe0da904ca Parents: e892025 Author: Reynold Xin <r...@databricks.com> Authored: Thu Nov 3 15:30:45 2016 -0700 Committer: Reynold Xin <r...@databricks.com> Committed: Thu Nov 3 15:30:45 2016 -0700 -- .../sql/streaming/FileStreamStressSuite.scala | 156 +++ .../spark/sql/streaming/FileStressSuite.scala | 153 -- 2 files changed, 156 insertions(+), 153 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f22954ad/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala new file mode 100644 index 000..28412ea --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.streaming + +import java.io.File +import java.util.UUID + +import scala.util.Random +import scala.util.control.NonFatal + +import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.util.Utils + +/** + * A stress test for streaming queries that read and write files. This test consists of + * two threads: + * - one that writes out `numRecords` distinct integers to files of random sizes (the total + *number of records is fixed but each files size / creation time is random). + * - another that continually restarts a buggy streaming query (i.e. fails with 5% probability on + *any partition). + * + * At the end, the resulting files are loaded and the answer is checked. + */ +class FileStreamStressSuite extends StreamTest { + import testImplicits._ + + // Error message thrown in the streaming job for testing recovery. + private val injectedErrorMsg = "test suite injected failure!" + + testQuietly("fault tolerance stress test - unpartitioned output") { +stressTest(partitionWrites = false) + } + + testQuietly("fault tolerance stress test - partitioned output") { +stressTest(partitionWrites = true) + } + + def stressTest(partitionWrites: Boolean): Unit = { +val numRecords = 1 +val inputDir = Utils.createTempDir(namePrefix = "stream.input").getCanonicalPath +val stagingDir = Utils.createTempDir(namePrefix = "stream.staging").getCanonicalPath +val outputDir = Utils.createTempDir(namePrefix = "stream.output").getCanonicalPath +val checkpoint = Utils.createTempDir(namePrefix = "stream.checkpoint").getCanonicalPath + +@volatile +var continue = true +@volatile +var stream: StreamingQuery = null + +val writer = new Thread("stream writer") { + override def run(): Unit = { +var i = numRecords +while
spark git commit: [SPARK-18237][HIVE] hive.exec.stagingdir have no effect
Repository: spark Updated Branches: refs/heads/master b17057c0a -> 16293311c [SPARK-18237][HIVE] hive.exec.stagingdir have no effect hive.exec.stagingdir have no effect in spark2.0.1ï¼ Hive confs in hive-site.xml will be loaded in `hadoopConf`, so we should use `hadoopConf` in `InsertIntoHiveTable` instead of `SessionState.conf` Author: ç¦æCloses #15744 from ClassNotFoundExp/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/16293311 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/16293311 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/16293311 Branch: refs/heads/master Commit: 16293311cdb25a62733a9aae4355659b971a3ce1 Parents: b17057c Author: ç¦æ Authored: Thu Nov 3 12:02:01 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 12:02:01 2016 -0700 -- .../apache/spark/sql/hive/execution/InsertIntoHiveTable.scala| 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/16293311/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index 15be12c..e333fc7 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -76,7 +76,8 @@ case class InsertIntoHiveTable( def output: Seq[Attribute] = Seq.empty - val stagingDir = sessionState.conf.getConfString("hive.exec.stagingdir", ".hive-staging") + val hadoopConf = sessionState.newHadoopConf() + val stagingDir = hadoopConf.get("hive.exec.stagingdir", ".hive-staging") private def executionId: String = { val rand: Random = new Random @@ -163,7 +164,6 @@ case class InsertIntoHiveTable( // instances within the closure, since Serializer is not serializable while TableDesc is. val tableDesc = table.tableDesc val tableLocation = table.hiveQlTable.getDataLocation -val hadoopConf = sessionState.newHadoopConf() val tmpLocation = getExternalTmpPath(tableLocation, hadoopConf) val fileSinkConf = new FileSinkDesc(tmpLocation.toString, tableDesc, false) val isCompressed = hadoopConf.get("hive.exec.compress.output", "false").toBoolean - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18237][HIVE] hive.exec.stagingdir have no effect
Repository: spark Updated Branches: refs/heads/branch-2.1 4f91630c8 -> 3e139e239 [SPARK-18237][HIVE] hive.exec.stagingdir have no effect hive.exec.stagingdir have no effect in spark2.0.1ï¼ Hive confs in hive-site.xml will be loaded in `hadoopConf`, so we should use `hadoopConf` in `InsertIntoHiveTable` instead of `SessionState.conf` Author: ç¦æCloses #15744 from ClassNotFoundExp/master. (cherry picked from commit 16293311cdb25a62733a9aae4355659b971a3ce1) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e139e23 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e139e23 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e139e23 Branch: refs/heads/branch-2.1 Commit: 3e139e2390085cfb42f7136f150b0fa08c14eb61 Parents: 4f91630 Author: ç¦æ Authored: Thu Nov 3 12:02:01 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 12:02:08 2016 -0700 -- .../apache/spark/sql/hive/execution/InsertIntoHiveTable.scala| 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3e139e23/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index 15be12c..e333fc7 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -76,7 +76,8 @@ case class InsertIntoHiveTable( def output: Seq[Attribute] = Seq.empty - val stagingDir = sessionState.conf.getConfString("hive.exec.stagingdir", ".hive-staging") + val hadoopConf = sessionState.newHadoopConf() + val stagingDir = hadoopConf.get("hive.exec.stagingdir", ".hive-staging") private def executionId: String = { val rand: Random = new Random @@ -163,7 +164,6 @@ case class InsertIntoHiveTable( // instances within the closure, since Serializer is not serializable while TableDesc is. val tableDesc = table.tableDesc val tableLocation = table.hiveQlTable.getDataLocation -val hadoopConf = sessionState.newHadoopConf() val tmpLocation = getExternalTmpPath(tableLocation, hadoopConf) val fileSinkConf = new FileSinkDesc(tmpLocation.toString, tableDesc, false) val isCompressed = hadoopConf.get("hive.exec.compress.output", "false").toBoolean - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18244][SQL] Rename partitionProviderIsHive -> tracksPartitionsInCatalog
Repository: spark Updated Branches: refs/heads/branch-2.1 c2876bfbf -> 4f91630c8 [SPARK-18244][SQL] Rename partitionProviderIsHive -> tracksPartitionsInCatalog ## What changes were proposed in this pull request? This patch renames partitionProviderIsHive to tracksPartitionsInCatalog, as the old name was too Hive specific. ## How was this patch tested? Should be covered by existing tests. Author: Reynold Xin <r...@databricks.com> Closes #15750 from rxin/SPARK-18244. (cherry picked from commit b17057c0a69b9c56e503483d97f5dc209eef0884) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f91630c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f91630c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f91630c Branch: refs/heads/branch-2.1 Commit: 4f91630c8100ee3a6fd168bc4247ca6fadd0a736 Parents: c2876bf Author: Reynold Xin <r...@databricks.com> Authored: Thu Nov 3 11:48:05 2016 -0700 Committer: Reynold Xin <r...@databricks.com> Committed: Thu Nov 3 11:48:17 2016 -0700 -- .../spark/sql/catalyst/catalog/interface.scala | 9 + .../sql/catalyst/trees/TreeNodeSuite.scala | 2 +- .../command/createDataSourceTables.scala| 2 +- .../spark/sql/execution/command/ddl.scala | 4 ++-- .../spark/sql/execution/command/tables.scala| 2 +- .../sql/execution/datasources/DataSource.scala | 2 +- .../datasources/DataSourceStrategy.scala| 7 --- .../InsertIntoHadoopFsRelationCommand.scala | 6 +- .../spark/sql/execution/command/DDLSuite.scala | 2 +- .../spark/sql/hive/HiveExternalCatalog.scala| 21 10 files changed, 30 insertions(+), 27 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f91630c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 7c3bec8..34748a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -138,8 +138,9 @@ case class BucketSpec( * Can be None if this table is a View, should be "hive" for hive serde tables. * @param unsupportedFeatures is a list of string descriptions of features that are used by the *underlying table but not supported by Spark SQL yet. - * @param partitionProviderIsHive whether this table's partition metadata is stored in the Hive - *metastore. + * @param tracksPartitionsInCatalog whether this table's partition metadata is stored in the + * catalog. If false, it is inferred automatically based on file + * structure. */ case class CatalogTable( identifier: TableIdentifier, @@ -158,7 +159,7 @@ case class CatalogTable( viewText: Option[String] = None, comment: Option[String] = None, unsupportedFeatures: Seq[String] = Seq.empty, -partitionProviderIsHive: Boolean = false) { +tracksPartitionsInCatalog: Boolean = false) { /** schema of this table's partition columns */ def partitionSchema: StructType = StructType(schema.filter { @@ -217,7 +218,7 @@ case class CatalogTable( if (properties.nonEmpty) s"Properties: $tableProperties" else "", if (stats.isDefined) s"Statistics: ${stats.get.simpleString}" else "", s"$storage", -if (partitionProviderIsHive) "Partition Provider: Hive" else "") +if (tracksPartitionsInCatalog) "Partition Provider: Catalog" else "") output.filter(_.nonEmpty).mkString("CatalogTable(\n\t", "\n\t", ")") } http://git-wip-us.apache.org/repos/asf/spark/blob/4f91630c/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index 3eff12f..af1eaa1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -489,7 +489,7 @@ class TreeNodeSuite extends SparkFunSuite { "