[2/2] spark git commit: Preparing development version 2.0.1-SNAPSHOT
Preparing development version 2.0.1-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3ebecbb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3ebecbb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3ebecbb Branch: refs/heads/branch-2.0 Commit: b3ebecbb7a5f2ce55ba0392bc59b26780ff69e13 Parents: 48d1fa3 Author: Patrick WendellAuthored: Wed Jul 13 22:32:55 2016 -0700 Committer: Patrick Wendell Committed: Wed Jul 13 22:32:55 2016 -0700 -- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 34 files changed, 34 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 5f546bb..507ddc7 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.0.0 +2.0.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 2eaa810..bc3b0fe 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.0 +2.0.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index f068d9d..2fb5835 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.0 +2.0.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index fd22188..07d9f1c 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.0 +2.0.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/common/sketch/pom.xml -- diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index a17aba5..5e02efd 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.0 +2.0.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/common/tags/pom.xml -- diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 0bd8846..e7fc6a2 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark
[1/2] spark git commit: Preparing Spark release v2.0.0-rc3
Repository: spark Updated Branches: refs/heads/branch-2.0 f6eda6b30 -> b3ebecbb7 Preparing Spark release v2.0.0-rc3 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/48d1fa3e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/48d1fa3e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/48d1fa3e Branch: refs/heads/branch-2.0 Commit: 48d1fa3e736b2ffbb660839735cbb8867f00fee2 Parents: f6eda6b Author: Patrick WendellAuthored: Wed Jul 13 22:32:45 2016 -0700 Committer: Patrick Wendell Committed: Wed Jul 13 22:32:45 2016 -0700 -- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 34 files changed, 34 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 507ddc7..5f546bb 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.0.1-SNAPSHOT +2.0.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index bc3b0fe..2eaa810 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.1-SNAPSHOT +2.0.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 2fb5835..f068d9d 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.1-SNAPSHOT +2.0.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 07d9f1c..fd22188 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.1-SNAPSHOT +2.0.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/common/sketch/pom.xml -- diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 5e02efd..a17aba5 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.1-SNAPSHOT +2.0.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/common/tags/pom.xml -- diff --git a/common/tags/pom.xml b/common/tags/pom.xml index e7fc6a2..0bd8846 100644 ---
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.0.0-rc3 [created] 48d1fa3e7 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16503] SparkSession should provide Spark version
Repository: spark Updated Branches: refs/heads/branch-2.0 5244f86cf -> f6eda6b30 [SPARK-16503] SparkSession should provide Spark version ## What changes were proposed in this pull request? This patch enables SparkSession to provide spark version. ## How was this patch tested? Manual test: ``` scala> sc.version res0: String = 2.1.0-SNAPSHOT scala> spark.version res1: String = 2.1.0-SNAPSHOT ``` ``` >>> sc.version u'2.1.0-SNAPSHOT' >>> spark.version u'2.1.0-SNAPSHOT' ``` Author: Liwei LinCloses #14165 from lw-lin/add-version. (cherry picked from commit 39c836e976fcae51568bed5ebab28e148383b5d4) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f6eda6b3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f6eda6b3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f6eda6b3 Branch: refs/heads/branch-2.0 Commit: f6eda6b3020a5be0ff898111bf84c71e36e9433b Parents: 5244f86 Author: Liwei Lin Authored: Wed Jul 13 22:30:46 2016 -0700 Committer: Reynold Xin Committed: Wed Jul 13 22:30:52 2016 -0700 -- python/pyspark/sql/session.py | 6 ++ .../src/main/scala/org/apache/spark/sql/SparkSession.scala | 9 - 2 files changed, 14 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f6eda6b3/python/pyspark/sql/session.py -- diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index a360fbe..594f937 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -234,6 +234,12 @@ class SparkSession(object): @property @since(2.0) +def version(self): +"""The version of Spark on which this application is running.""" +return self._jsparkSession.version() + +@property +@since(2.0) def conf(self): """Runtime configuration interface for Spark. http://git-wip-us.apache.org/repos/asf/spark/blob/f6eda6b3/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index a3fd39d..df0950d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -25,7 +25,7 @@ import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag import scala.util.control.NonFatal -import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.{SPARK_VERSION, SparkConf, SparkContext} import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging @@ -79,6 +79,13 @@ class SparkSession private( sparkContext.assertNotStopped() + /** + * The version of Spark on which this application is running. + * + * @since 2.0.0 + */ + def version: String = SPARK_VERSION + /* --- * | Session-related state | * --- */ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.0.0-rc3 [deleted] 47eb9a621 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: Preparing development version 2.0.1-SNAPSHOT
Preparing development version 2.0.1-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5244f86c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5244f86c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5244f86c Branch: refs/heads/branch-2.0 Commit: 5244f86cfc8532587b70cf1cc20b0684583b9c23 Parents: 47eb9a6 Author: Patrick WendellAuthored: Wed Jul 13 22:27:15 2016 -0700 Committer: Patrick Wendell Committed: Wed Jul 13 22:27:15 2016 -0700 -- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 34 files changed, 34 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 5f546bb..507ddc7 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.0.0 +2.0.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 2eaa810..bc3b0fe 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.0 +2.0.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index f068d9d..2fb5835 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.0 +2.0.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index fd22188..07d9f1c 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.0 +2.0.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/common/sketch/pom.xml -- diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index a17aba5..5e02efd 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.0 +2.0.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/common/tags/pom.xml -- diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 0bd8846..e7fc6a2 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.0.0-rc3 [created] 47eb9a621 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: Preparing Spark release v2.0.0-rc3
Repository: spark Updated Branches: refs/heads/branch-2.0 abb802359 -> 5244f86cf Preparing Spark release v2.0.0-rc3 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47eb9a62 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47eb9a62 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47eb9a62 Branch: refs/heads/branch-2.0 Commit: 47eb9a621e1ca37ecd31fa03f8bb42b83c366c01 Parents: abb8023 Author: Patrick WendellAuthored: Wed Jul 13 22:27:07 2016 -0700 Committer: Patrick Wendell Committed: Wed Jul 13 22:27:07 2016 -0700 -- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 34 files changed, 34 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 507ddc7..5f546bb 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.0.1-SNAPSHOT +2.0.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index bc3b0fe..2eaa810 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.1-SNAPSHOT +2.0.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 2fb5835..f068d9d 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.1-SNAPSHOT +2.0.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 07d9f1c..fd22188 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.1-SNAPSHOT +2.0.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/common/sketch/pom.xml -- diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 5e02efd..a17aba5 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.0.1-SNAPSHOT +2.0.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/common/tags/pom.xml -- diff --git a/common/tags/pom.xml b/common/tags/pom.xml index e7fc6a2..0bd8846 100644 ---
spark git commit: [SPARK-16536][SQL][PYSPARK][MINOR] Expose `sql` in PySpark Shell
Repository: spark Updated Branches: refs/heads/master a5f51e216 -> 9c530576a [SPARK-16536][SQL][PYSPARK][MINOR] Expose `sql` in PySpark Shell ## What changes were proposed in this pull request? This PR exposes `sql` in PySpark Shell like Scala/R Shells for consistency. **Background** * Scala ```scala scala> sql("select 1 a") res0: org.apache.spark.sql.DataFrame = [a: int] ``` * R ```r > sql("select 1") SparkDataFrame[1:int] ``` **Before** * Python ```python >>> sql("select 1 a") Traceback (most recent call last): File "", line 1, in NameError: name 'sql' is not defined ``` **After** * Python ```python >>> sql("select 1 a") DataFrame[a: int] ``` ## How was this patch tested? Manual. Author: Dongjoon HyunCloses #14190 from dongjoon-hyun/SPARK-16536. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9c530576 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9c530576 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9c530576 Branch: refs/heads/master Commit: 9c530576a44cbeb956db94e7fdd1fad50bd62973 Parents: a5f51e2 Author: Dongjoon Hyun Authored: Wed Jul 13 22:24:26 2016 -0700 Committer: Reynold Xin Committed: Wed Jul 13 22:24:26 2016 -0700 -- python/pyspark/shell.py | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9c530576/python/pyspark/shell.py -- diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index ac5ce87..c1917d2 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -49,6 +49,7 @@ except TypeError: spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext +sql = spark.sql atexit.register(lambda: sc.stop()) # for compatibility - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16485][ML][DOC] Fix privacy of GLM members, rename sqlDataTypes for ML, doc fixes
Repository: spark Updated Branches: refs/heads/branch-2.0 550d0e7dc -> abb802359 [SPARK-16485][ML][DOC] Fix privacy of GLM members, rename sqlDataTypes for ML, doc fixes ## What changes were proposed in this pull request? Fixing issues found during 2.0 API checks: * GeneralizedLinearRegressionModel: linkObj, familyObj, familyAndLink should not be exposed * sqlDataTypes: name does not follow conventions. Do we need to expose it? * Evaluator: inconsistent doc between evaluate and isLargerBetter * MinMaxScaler: math rendering --> hard to make it great, but I'll change it a little * GeneralizedLinearRegressionSummary: aic doc is incorrect --> will change to use more common name ## How was this patch tested? Existing unit tests. Docs generated locally. (MinMaxScaler is improved a tiny bit.) Author: Joseph K. BradleyCloses #14187 from jkbradley/final-api-check-2.0. (cherry picked from commit a5f51e21627c1bcfc62829a3a962707abf41a452) Signed-off-by: Joseph K. Bradley Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/abb80235 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/abb80235 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/abb80235 Branch: refs/heads/branch-2.0 Commit: abb8023599df4a9b5133accf547607eda5ca45d2 Parents: 550d0e7 Author: Joseph K. Bradley Authored: Wed Jul 13 15:40:44 2016 -0700 Committer: Joseph K. Bradley Committed: Wed Jul 13 15:40:53 2016 -0700 -- .../apache/spark/ml/evaluation/Evaluator.scala | 7 ++-- .../apache/spark/ml/feature/MinMaxScaler.scala | 4 +-- .../apache/spark/ml/linalg/SQLDataTypes.scala | 36 .../org/apache/spark/ml/linalg/dataTypes.scala | 35 --- .../GeneralizedLinearRegression.scala | 10 +++--- .../spark/ml/linalg/JavaSQLDataTypesSuite.java | 2 +- .../spark/ml/linalg/SQLDataTypesSuite.scala | 4 +-- 7 files changed, 51 insertions(+), 47 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala index 5f765c0..dfbc3e5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala @@ -30,7 +30,8 @@ import org.apache.spark.sql.Dataset abstract class Evaluator extends Params { /** - * Evaluates model output and returns a scalar metric (larger is better). + * Evaluates model output and returns a scalar metric. + * The value of [[isLargerBetter]] specifies whether larger values are better. * * @param dataset a dataset that contains labels/observations and predictions. * @param paramMap parameter map that specifies the input columns and output metrics @@ -42,7 +43,9 @@ abstract class Evaluator extends Params { } /** - * Evaluates the output. + * Evaluates model output and returns a scalar metric. + * The value of [[isLargerBetter]] specifies whether larger values are better. + * * @param dataset a dataset that contains labels/observations and predictions. * @return metric */ http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index 7b03f0c..9ed8d83 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -78,9 +78,9 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H * statistics, which is also known as min-max normalization or Rescaling. The rescaled value for * feature E is calculated as, * - * Rescaled(e_i) = \frac{e_i - E_{min}}{E_{max} - E_{min}} * (max - min) + min + * `Rescaled(e_i) = \frac{e_i - E_{min}}{E_{max} - E_{min}} * (max - min) + min` * - * For the case E_{max} == E_{min}, Rescaled(e_i) = 0.5 * (max + min) + * For the case `E_{max} == E_{min}`, `Rescaled(e_i) = 0.5 * (max + min)`. * Note that since zero values will probably be transformed to non-zero values, output of the * transformer will be DenseVector even for sparse input. */ http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/linalg/SQLDataTypes.scala
spark git commit: [SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema
Repository: spark Updated Branches: refs/heads/branch-2.0 9e3a59858 -> 550d0e7dc [SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema What changes were proposed in this pull request? If we create a table pointing to a parquet/json datasets without specifying the schema, describe table command does not show the schema at all. It only shows `# Schema of this table is inferred at runtime`. In 1.6, describe table does show the schema of such a table. ~~For data source tables, to infer the schema, we need to load the data source tables at runtime. Thus, this PR calls the function `lookupRelation`.~~ For data source tables, we infer the schema before table creation. Thus, this PR set the inferred schema as the table schema when table creation. How was this patch tested? Added test cases Author: gatorsmileCloses #14148 from gatorsmile/describeSchema. (cherry picked from commit c5ec879828369ec1d21acd7f18a792306634ff74) Signed-off-by: Yin Huai Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/550d0e7d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/550d0e7d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/550d0e7d Branch: refs/heads/branch-2.0 Commit: 550d0e7dc6339fac0fe3bb5a8d6038681fd3fec3 Parents: 9e3a598 Author: gatorsmile Authored: Wed Jul 13 15:23:37 2016 -0700 Committer: Yin Huai Committed: Wed Jul 13 15:23:59 2016 -0700 -- .../spark/sql/execution/command/tables.scala| 28 +--- .../spark/sql/hive/execution/HiveDDLSuite.scala | 16 ++- 2 files changed, 22 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/550d0e7d/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 1483604..b2300b4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -413,29 +413,29 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF } else { val metadata = catalog.getTableMetadata(table) + if (DDLUtils.isDatasourceTable(metadata)) { +DDLUtils.getSchemaFromTableProperties(metadata) match { + case Some(userSpecifiedSchema) => describeSchema(userSpecifiedSchema, result) + case None => describeSchema(catalog.lookupRelation(table).schema, result) +} + } else { +describeSchema(metadata.schema, result) + } + if (isExtended) { describeExtended(metadata, result) } else if (isFormatted) { describeFormatted(metadata, result) } else { -describe(metadata, result) +describePartitionInfo(metadata, result) } } result } - // Shows data columns and partitioned columns (if any) - private def describe(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { + private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { if (DDLUtils.isDatasourceTable(table)) { - val schema = DDLUtils.getSchemaFromTableProperties(table) - - if (schema.isEmpty) { -append(buffer, "# Schema of this table is inferred at runtime", "", "") - } else { -schema.foreach(describeSchema(_, buffer)) - } - val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table) if (partCols.nonEmpty) { append(buffer, "# Partition Information", "", "") @@ -443,8 +443,6 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF partCols.foreach(col => append(buffer, col, "", "")) } } else { - describeSchema(table.schema, buffer) - if (table.partitionColumns.nonEmpty) { append(buffer, "# Partition Information", "", "") append(buffer, s"# ${output.head.name}", output(1).name, output(2).name) @@ -454,14 +452,14 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF } private def describeExtended(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { -describe(table, buffer) +describePartitionInfo(table, buffer) append(buffer, "", "", "") append(buffer, "# Detailed Table Information", table.toString, "") } private def describeFormatted(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { -describe(table, buffer) +describePartitionInfo(table,
spark git commit: [SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema
Repository: spark Updated Branches: refs/heads/master fb2e8eeb0 -> c5ec87982 [SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema What changes were proposed in this pull request? If we create a table pointing to a parquet/json datasets without specifying the schema, describe table command does not show the schema at all. It only shows `# Schema of this table is inferred at runtime`. In 1.6, describe table does show the schema of such a table. ~~For data source tables, to infer the schema, we need to load the data source tables at runtime. Thus, this PR calls the function `lookupRelation`.~~ For data source tables, we infer the schema before table creation. Thus, this PR set the inferred schema as the table schema when table creation. How was this patch tested? Added test cases Author: gatorsmileCloses #14148 from gatorsmile/describeSchema. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c5ec8798 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c5ec8798 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c5ec8798 Branch: refs/heads/master Commit: c5ec879828369ec1d21acd7f18a792306634ff74 Parents: fb2e8ee Author: gatorsmile Authored: Wed Jul 13 15:23:37 2016 -0700 Committer: Yin Huai Committed: Wed Jul 13 15:23:37 2016 -0700 -- .../spark/sql/execution/command/tables.scala| 28 +--- .../spark/sql/hive/execution/HiveDDLSuite.scala | 16 ++- 2 files changed, 22 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c5ec8798/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 5c815df..6651c33 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -413,29 +413,29 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF } else { val metadata = catalog.getTableMetadata(table) + if (DDLUtils.isDatasourceTable(metadata)) { +DDLUtils.getSchemaFromTableProperties(metadata) match { + case Some(userSpecifiedSchema) => describeSchema(userSpecifiedSchema, result) + case None => describeSchema(catalog.lookupRelation(table).schema, result) +} + } else { +describeSchema(metadata.schema, result) + } + if (isExtended) { describeExtended(metadata, result) } else if (isFormatted) { describeFormatted(metadata, result) } else { -describe(metadata, result) +describePartitionInfo(metadata, result) } } result } - // Shows data columns and partitioned columns (if any) - private def describe(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { + private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { if (DDLUtils.isDatasourceTable(table)) { - val schema = DDLUtils.getSchemaFromTableProperties(table) - - if (schema.isEmpty) { -append(buffer, "# Schema of this table is inferred at runtime", "", "") - } else { -schema.foreach(describeSchema(_, buffer)) - } - val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table) if (partCols.nonEmpty) { append(buffer, "# Partition Information", "", "") @@ -443,8 +443,6 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF partCols.foreach(col => append(buffer, col, "", "")) } } else { - describeSchema(table.schema, buffer) - if (table.partitionColumns.nonEmpty) { append(buffer, "# Partition Information", "", "") append(buffer, s"# ${output.head.name}", output(1).name, output(2).name) @@ -454,14 +452,14 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF } private def describeExtended(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { -describe(table, buffer) +describePartitionInfo(table, buffer) append(buffer, "", "", "") append(buffer, "# Detailed Table Information", table.toString, "") } private def describeFormatted(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { -describe(table, buffer) +describePartitionInfo(table, buffer) append(buffer, "", "", "") append(buffer, "# Detailed Table Information", "", "")
spark git commit: [SPARKR][DOCS][MINOR] R programming guide to include csv data source example
Repository: spark Updated Branches: refs/heads/master b4baf086c -> fb2e8eeb0 [SPARKR][DOCS][MINOR] R programming guide to include csv data source example ## What changes were proposed in this pull request? Minor documentation update for code example, code style, and missed reference to "sparkR.init" ## How was this patch tested? manual shivaram Author: Felix CheungCloses #14178 from felixcheung/rcsvprogrammingguide. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb2e8eeb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb2e8eeb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb2e8eeb Branch: refs/heads/master Commit: fb2e8eeb0b1e56bea535165f7a3bec6558b3f4a3 Parents: b4baf08 Author: Felix Cheung Authored: Wed Jul 13 15:09:23 2016 -0700 Committer: Shivaram Venkataraman Committed: Wed Jul 13 15:09:23 2016 -0700 -- R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +- docs/sparkr.md| 27 +- 2 files changed, 19 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fb2e8eeb/R/pkg/inst/tests/testthat/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index fdd6020..e61fa41 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -237,7 +237,7 @@ test_that("read csv as DataFrame", { "Empty,Dummy,Placeholder") writeLines(mockLinesCsv, csvPath) - df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.string = "Empty") + df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.strings = "Empty") expect_equal(count(df2), 4) withoutna2 <- na.omit(df2, how = "any", cols = "year") expect_equal(count(withoutna2), 3) http://git-wip-us.apache.org/repos/asf/spark/blob/fb2e8eeb/docs/sparkr.md -- diff --git a/docs/sparkr.md b/docs/sparkr.md index b4acb23..9fda0ec 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -111,19 +111,17 @@ head(df) SparkR supports operating on a variety of data sources through the `SparkDataFrame` interface. This section describes the general methods for loading and saving data using Data Sources. You can check the Spark SQL programming guide for more [specific options](sql-programming-guide.html#manually-specifying-options) that are available for the built-in data sources. The general method for creating SparkDataFrames from data sources is `read.df`. This method takes in the path for the file to load and the type of data source, and the currently active SparkSession will be used automatically. SparkR supports reading JSON, CSV and Parquet files natively and through [Spark Packages](http://spark-packages.org/) you can find data source connectors for popular file formats like [Avro](http://spark-packages.org/package/databricks/spark-avro). These packages can either be added by -specifying `--packages` with `spark-submit` or `sparkR` commands, or if creating context through `init` -you can specify the packages with the `packages` argument. +specifying `--packages` with `spark-submit` or `sparkR` commands, or if initializing SparkSession with `sparkPackages` parameter when in an interactive R shell or from RStudio. {% highlight r %} -sc <- sparkR.session(sparkPackages="com.databricks:spark-avro_2.11:3.0.0") +sc <- sparkR.session(sparkPackages = "com.databricks:spark-avro_2.11:3.0.0") {% endhighlight %} We can see how to use data sources using an example JSON input file. Note that the file that is used here is _not_ a typical JSON file. Each line in the file must contain a separate, self-contained valid JSON object. As a consequence, a regular multi-line JSON file will most often fail. - {% highlight r %} people <- read.df("./examples/src/main/resources/people.json", "json") head(people) @@ -138,6 +136,18 @@ printSchema(people) # |-- age: long (nullable = true) # |-- name: string (nullable = true) +# Similarly, multiple files can be read with read.json +people <- read.json(c("./examples/src/main/resources/people.json", "./examples/src/main/resources/people2.json")) + +{% endhighlight %} + + +The data sources API natively supports CSV formatted input files. For more information please refer to SparkR [read.df](api/R/read.df.html) API documentation. + + +{% highlight r %} +df <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.strings = "NA") + {% endhighlight %} @@ -146,7 +156,7 @@ to a Parquet file using
spark git commit: [SPARKR][MINOR] R examples and test updates
Repository: spark Updated Branches: refs/heads/master 51a6706b1 -> b4baf086c [SPARKR][MINOR] R examples and test updates ## What changes were proposed in this pull request? Minor example updates ## How was this patch tested? manual shivaram Author: Felix CheungCloses #14171 from felixcheung/rexample. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4baf086 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4baf086 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4baf086 Branch: refs/heads/master Commit: b4baf086ca380a46d953f2710184ad9eee3a045e Parents: 51a6706 Author: Felix Cheung Authored: Wed Jul 13 13:33:34 2016 -0700 Committer: Shivaram Venkataraman Committed: Wed Jul 13 13:33:34 2016 -0700 -- R/pkg/inst/tests/testthat/jarTest.R | 2 +- R/pkg/inst/tests/testthat/packageInAJarTest.R | 2 +- examples/src/main/r/RSparkSQLExample.R| 3 +++ examples/src/main/r/dataframe.R | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b4baf086/R/pkg/inst/tests/testthat/jarTest.R -- diff --git a/R/pkg/inst/tests/testthat/jarTest.R b/R/pkg/inst/tests/testthat/jarTest.R index 84e4845..51754a4 100644 --- a/R/pkg/inst/tests/testthat/jarTest.R +++ b/R/pkg/inst/tests/testthat/jarTest.R @@ -16,7 +16,7 @@ # library(SparkR) -sparkSession <- sparkR.session() +sparkR.session() helloTest <- SparkR:::callJStatic("sparkR.test.hello", "helloWorld", http://git-wip-us.apache.org/repos/asf/spark/blob/b4baf086/R/pkg/inst/tests/testthat/packageInAJarTest.R -- diff --git a/R/pkg/inst/tests/testthat/packageInAJarTest.R b/R/pkg/inst/tests/testthat/packageInAJarTest.R index 940c91f..4bc935c 100644 --- a/R/pkg/inst/tests/testthat/packageInAJarTest.R +++ b/R/pkg/inst/tests/testthat/packageInAJarTest.R @@ -17,7 +17,7 @@ library(SparkR) library(sparkPackageTest) -sparkSession <- sparkR.session() +sparkR.session() run1 <- myfunc(5L) http://git-wip-us.apache.org/repos/asf/spark/blob/b4baf086/examples/src/main/r/RSparkSQLExample.R -- diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R index eba3f1b..f20875c 100644 --- a/examples/src/main/r/RSparkSQLExample.R +++ b/examples/src/main/r/RSparkSQLExample.R @@ -195,3 +195,6 @@ results <- collect(sql("FROM src SELECT key, value")) # $example on:jdbc$ df <- read.jdbc("jdbc:postgresql:dbserver", "schema.tablename", user = "username", password = "password") # $example off:jdbc$ + +# Stop the SparkSession now +sparkR.session.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/b4baf086/examples/src/main/r/dataframe.R -- diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R index 295f9b4..82b85f2 100644 --- a/examples/src/main/r/dataframe.R +++ b/examples/src/main/r/dataframe.R @@ -18,7 +18,7 @@ library(SparkR) # Initialize SparkSession -sc <- sparkR.session(appName = "SparkR-DataFrame-example") +sparkR.session(appName = "SparkR-DataFrame-example") # Create a simple local data.frame localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18)) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARKR][MINOR] R examples and test updates
Repository: spark Updated Branches: refs/heads/branch-2.0 86adc5cfb -> 18255a934 [SPARKR][MINOR] R examples and test updates ## What changes were proposed in this pull request? Minor example updates ## How was this patch tested? manual shivaram Author: Felix CheungCloses #14171 from felixcheung/rexample. (cherry picked from commit b4baf086ca380a46d953f2710184ad9eee3a045e) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/18255a93 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/18255a93 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/18255a93 Branch: refs/heads/branch-2.0 Commit: 18255a9345dd711bf630993c582511efa74b7919 Parents: 86adc5c Author: Felix Cheung Authored: Wed Jul 13 13:33:34 2016 -0700 Committer: Shivaram Venkataraman Committed: Wed Jul 13 13:33:47 2016 -0700 -- R/pkg/inst/tests/testthat/jarTest.R | 2 +- R/pkg/inst/tests/testthat/packageInAJarTest.R | 2 +- examples/src/main/r/RSparkSQLExample.R| 3 +++ examples/src/main/r/dataframe.R | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/18255a93/R/pkg/inst/tests/testthat/jarTest.R -- diff --git a/R/pkg/inst/tests/testthat/jarTest.R b/R/pkg/inst/tests/testthat/jarTest.R index 84e4845..51754a4 100644 --- a/R/pkg/inst/tests/testthat/jarTest.R +++ b/R/pkg/inst/tests/testthat/jarTest.R @@ -16,7 +16,7 @@ # library(SparkR) -sparkSession <- sparkR.session() +sparkR.session() helloTest <- SparkR:::callJStatic("sparkR.test.hello", "helloWorld", http://git-wip-us.apache.org/repos/asf/spark/blob/18255a93/R/pkg/inst/tests/testthat/packageInAJarTest.R -- diff --git a/R/pkg/inst/tests/testthat/packageInAJarTest.R b/R/pkg/inst/tests/testthat/packageInAJarTest.R index 940c91f..4bc935c 100644 --- a/R/pkg/inst/tests/testthat/packageInAJarTest.R +++ b/R/pkg/inst/tests/testthat/packageInAJarTest.R @@ -17,7 +17,7 @@ library(SparkR) library(sparkPackageTest) -sparkSession <- sparkR.session() +sparkR.session() run1 <- myfunc(5L) http://git-wip-us.apache.org/repos/asf/spark/blob/18255a93/examples/src/main/r/RSparkSQLExample.R -- diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R index eba3f1b..f20875c 100644 --- a/examples/src/main/r/RSparkSQLExample.R +++ b/examples/src/main/r/RSparkSQLExample.R @@ -195,3 +195,6 @@ results <- collect(sql("FROM src SELECT key, value")) # $example on:jdbc$ df <- read.jdbc("jdbc:postgresql:dbserver", "schema.tablename", user = "username", password = "password") # $example off:jdbc$ + +# Stop the SparkSession now +sparkR.session.stop() http://git-wip-us.apache.org/repos/asf/spark/blob/18255a93/examples/src/main/r/dataframe.R -- diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R index 295f9b4..82b85f2 100644 --- a/examples/src/main/r/dataframe.R +++ b/examples/src/main/r/dataframe.R @@ -18,7 +18,7 @@ library(SparkR) # Initialize SparkSession -sc <- sparkR.session(appName = "SparkR-DataFrame-example") +sparkR.session(appName = "SparkR-DataFrame-example") # Create a simple local data.frame localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18)) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16114][SQL] updated structured streaming guide
Repository: spark Updated Branches: refs/heads/branch-2.0 7de183d97 -> 86adc5cfb [SPARK-16114][SQL] updated structured streaming guide ## What changes were proposed in this pull request? Updated structured streaming programming guide with new windowed example. ## How was this patch tested? Docs Author: James ThomasCloses #14183 from jjthomas/ss_docs_update. (cherry picked from commit 51a6706b1339bb761602e33276a469f71be2cd90) Signed-off-by: Tathagata Das Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/86adc5cf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/86adc5cf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/86adc5cf Branch: refs/heads/branch-2.0 Commit: 86adc5cfbe286eb4d6071ec9ee09b6d0960a8509 Parents: 7de183d Author: James Thomas Authored: Wed Jul 13 13:26:23 2016 -0700 Committer: Tathagata Das Committed: Wed Jul 13 13:26:34 2016 -0700 -- docs/structured-streaming-programming-guide.md | 49 ++--- 1 file changed, 23 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/86adc5cf/docs/structured-streaming-programming-guide.md -- diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md index 7949396..3ef39e4 100644 --- a/docs/structured-streaming-programming-guide.md +++ b/docs/structured-streaming-programming-guide.md @@ -626,52 +626,49 @@ The result tables would look something like the following. ![Window Operations](img/structured-streaming-window.png) -Since this windowing is similar to grouping, in code, you can use `groupBy()` and `window()` operations to express windowed aggregations. +Since this windowing is similar to grouping, in code, you can use `groupBy()` and `window()` operations to express windowed aggregations. You can see the full code for the below examples in +[Scala]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala)/ +[Java]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCountWindowed.java)/ +[Python]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py). {% highlight scala %} -// Number of events in every 1 minute time windows -df.groupBy(window(df.col("time"), "1 minute")) - .count() +import spark.implicits._ +val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String } -// Average number of events for each device type in every 1 minute time windows -df.groupBy( - df.col("type"), - window(df.col("time"), "1 minute")) - .avg("signal") +// Group the data by window and word and compute the count of each group +val windowedCounts = words.groupBy( + window($"timestamp", "10 minutes", "5 minutes"), + $"word" +).count() {% endhighlight %} {% highlight java %} -import static org.apache.spark.sql.functions.window; - -// Number of events in every 1 minute time windows -df.groupBy(window(df.col("time"), "1 minute")) - .count(); - -// Average number of events for each device type in every 1 minute time windows -df.groupBy( - df.col("type"), - window(df.col("time"), "1 minute")) - .avg("signal"); +Dataset words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String } +// Group the data by window and word and compute the count of each group +Dataset windowedCounts = words.groupBy( + functions.window(words.col("timestamp"), "10 minutes", "5 minutes"), + words.col("word") +).count(); {% endhighlight %} {% highlight python %} -from pyspark.sql.functions import window - -# Number of events in every 1 minute time windows -df.groupBy(window("time", "1 minute")).count() +words = ... # streaming DataFrame of schema { timestamp: Timestamp, word: String } -# Average number of events for each device type in every 1 minute time windows -df.groupBy("type", window("time", "1 minute")).avg("signal") +# Group the data by window and word and compute the count of each group +windowedCounts = words.groupBy( +window(words.timestamp, '10 minutes', '5 minutes'), +words.word +).count() {% endhighlight %} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16114][SQL] updated structured streaming guide
Repository: spark Updated Branches: refs/heads/master 0744d84c9 -> 51a6706b1 [SPARK-16114][SQL] updated structured streaming guide ## What changes were proposed in this pull request? Updated structured streaming programming guide with new windowed example. ## How was this patch tested? Docs Author: James ThomasCloses #14183 from jjthomas/ss_docs_update. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51a6706b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51a6706b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51a6706b Branch: refs/heads/master Commit: 51a6706b1339bb761602e33276a469f71be2cd90 Parents: 0744d84 Author: James Thomas Authored: Wed Jul 13 13:26:23 2016 -0700 Committer: Tathagata Das Committed: Wed Jul 13 13:26:23 2016 -0700 -- docs/structured-streaming-programming-guide.md | 49 ++--- 1 file changed, 23 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/51a6706b/docs/structured-streaming-programming-guide.md -- diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md index 7949396..3ef39e4 100644 --- a/docs/structured-streaming-programming-guide.md +++ b/docs/structured-streaming-programming-guide.md @@ -626,52 +626,49 @@ The result tables would look something like the following. ![Window Operations](img/structured-streaming-window.png) -Since this windowing is similar to grouping, in code, you can use `groupBy()` and `window()` operations to express windowed aggregations. +Since this windowing is similar to grouping, in code, you can use `groupBy()` and `window()` operations to express windowed aggregations. You can see the full code for the below examples in +[Scala]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala)/ +[Java]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCountWindowed.java)/ +[Python]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py). {% highlight scala %} -// Number of events in every 1 minute time windows -df.groupBy(window(df.col("time"), "1 minute")) - .count() +import spark.implicits._ +val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String } -// Average number of events for each device type in every 1 minute time windows -df.groupBy( - df.col("type"), - window(df.col("time"), "1 minute")) - .avg("signal") +// Group the data by window and word and compute the count of each group +val windowedCounts = words.groupBy( + window($"timestamp", "10 minutes", "5 minutes"), + $"word" +).count() {% endhighlight %} {% highlight java %} -import static org.apache.spark.sql.functions.window; - -// Number of events in every 1 minute time windows -df.groupBy(window(df.col("time"), "1 minute")) - .count(); - -// Average number of events for each device type in every 1 minute time windows -df.groupBy( - df.col("type"), - window(df.col("time"), "1 minute")) - .avg("signal"); +Dataset words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String } +// Group the data by window and word and compute the count of each group +Dataset windowedCounts = words.groupBy( + functions.window(words.col("timestamp"), "10 minutes", "5 minutes"), + words.col("word") +).count(); {% endhighlight %} {% highlight python %} -from pyspark.sql.functions import window - -# Number of events in every 1 minute time windows -df.groupBy(window("time", "1 minute")).count() +words = ... # streaming DataFrame of schema { timestamp: Timestamp, word: String } -# Average number of events for each device type in every 1 minute time windows -df.groupBy("type", window("time", "1 minute")).avg("signal") +# Group the data by window and word and compute the count of each group +windowedCounts = words.groupBy( +window(words.timestamp, '10 minutes', '5 minutes'), +words.word +).count() {% endhighlight %} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16531][SQL][TEST] Remove timezone setting from DataFrameTimeWindowingSuite
Repository: spark Updated Branches: refs/heads/branch-2.0 2e97f3a08 -> 7de183d97 [SPARK-16531][SQL][TEST] Remove timezone setting from DataFrameTimeWindowingSuite ## What changes were proposed in this pull request? It's unnecessary. `QueryTest` already sets it. Author: Burak YavuzCloses #14170 from brkyvz/test-tz. (cherry picked from commit 0744d84c91d6e494dea77a35e6410bc4b1849e71) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7de183d9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7de183d9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7de183d9 Branch: refs/heads/branch-2.0 Commit: 7de183d975c1a46bde6a9020b339673d953dd1a1 Parents: 2e97f3a Author: Burak Yavuz Authored: Wed Jul 13 12:54:57 2016 -0700 Committer: Michael Armbrust Committed: Wed Jul 13 12:55:11 2016 -0700 -- .../apache/spark/sql/DataFrameTimeWindowingSuite.scala| 10 -- 1 file changed, 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7de183d9/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala index a15b4e1..4296ec5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala @@ -29,16 +29,6 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSQLContext with B import testImplicits._ - override def beforeEach(): Unit = { -super.beforeEach() -TimeZone.setDefault(TimeZone.getTimeZone("UTC")) - } - - override def afterEach(): Unit = { -super.beforeEach() -TimeZone.setDefault(null) - } - test("tumbling window groupBy statement") { val df = Seq( ("2016-03-27 19:39:34", 1, "a"), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16531][SQL][TEST] Remove timezone setting from DataFrameTimeWindowingSuite
Repository: spark Updated Branches: refs/heads/master 01f09b161 -> 0744d84c9 [SPARK-16531][SQL][TEST] Remove timezone setting from DataFrameTimeWindowingSuite ## What changes were proposed in this pull request? It's unnecessary. `QueryTest` already sets it. Author: Burak YavuzCloses #14170 from brkyvz/test-tz. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0744d84c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0744d84c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0744d84c Branch: refs/heads/master Commit: 0744d84c91d6e494dea77a35e6410bc4b1849e71 Parents: 01f09b1 Author: Burak Yavuz Authored: Wed Jul 13 12:54:57 2016 -0700 Committer: Michael Armbrust Committed: Wed Jul 13 12:54:57 2016 -0700 -- .../apache/spark/sql/DataFrameTimeWindowingSuite.scala| 10 -- 1 file changed, 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0744d84c/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala index a15b4e1..4296ec5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala @@ -29,16 +29,6 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSQLContext with B import testImplicits._ - override def beforeEach(): Unit = { -super.beforeEach() -TimeZone.setDefault(TimeZone.getTimeZone("UTC")) - } - - override def afterEach(): Unit = { -super.beforeEach() -TimeZone.setDefault(null) - } - test("tumbling window groupBy statement") { val df = Seq( ("2016-03-27 19:39:34", 1, "a"), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: [SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML
Repository: spark Updated Branches: refs/heads/branch-2.0 90f0e8132 -> 2e97f3a08 http://git-wip-us.apache.org/repos/asf/spark/blob/2e97f3a0/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala index 91edcf2..f1664ce 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala @@ -22,7 +22,7 @@ import java.util.Random import scala.annotation.tailrec import scala.collection.mutable -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors} @@ -31,8 +31,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel /** - * :: Experimental :: - * * A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques" * by Steinbach, Karypis, and Kumar, with modification to fit Spark. * The algorithm starts from a single cluster that contains all points. @@ -54,7 +52,6 @@ import org.apache.spark.storage.StorageLevel * KDD Workshop on Text Mining, 2000.]] */ @Since("1.6.0") -@Experimental class BisectingKMeans private ( private var k: Int, private var maxIterations: Int, @@ -398,8 +395,6 @@ private object BisectingKMeans extends Serializable { } /** - * :: Experimental :: - * * Represents a node in a clustering tree. * * @param index node index, negative for internal nodes and non-negative for leaf nodes @@ -411,7 +406,6 @@ private object BisectingKMeans extends Serializable { * @param children children nodes */ @Since("1.6.0") -@Experimental private[clustering] class ClusteringTreeNode private[clustering] ( val index: Int, val size: Long, http://git-wip-us.apache.org/repos/asf/spark/blob/2e97f3a0/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala index 11fd940..8438015 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala @@ -23,7 +23,7 @@ import org.json4s.jackson.JsonMethods._ import org.json4s.JsonDSL._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging import org.apache.spark.mllib.linalg.Vector @@ -32,8 +32,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} /** - * :: Experimental :: - * * Clustering model produced by [[BisectingKMeans]]. * The prediction is done level-by-level from the root node to a leaf node, and at each node among * its children the closest to the input point is selected. @@ -41,7 +39,6 @@ import org.apache.spark.sql.{Row, SparkSession} * @param root the root node of the clustering tree */ @Since("1.6.0") -@Experimental class BisectingKMeansModel private[clustering] ( private[clustering] val root: ClusteringTreeNode ) extends Serializable with Saveable with Logging { http://git-wip-us.apache.org/repos/asf/spark/blob/2e97f3a0/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index d295826..9ebba1d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -25,7 +25,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.{JavaPairRDD, JavaRDD} import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId} import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors} @@ -426,13 +426,10 @@ class LocalLDAModel private[spark] ( } /** - * :: Experimental :: - * * Local (non-distributed) model fitted by [[LDA]]. * * This model stores the inferred topics only; it does not store info about the training dataset. */
[1/2] spark git commit: [SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML
Repository: spark Updated Branches: refs/heads/master d8220c1e5 -> 01f09b161 http://git-wip-us.apache.org/repos/asf/spark/blob/01f09b16/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala index 91edcf2..f1664ce 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala @@ -22,7 +22,7 @@ import java.util.Random import scala.annotation.tailrec import scala.collection.mutable -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors} @@ -31,8 +31,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel /** - * :: Experimental :: - * * A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques" * by Steinbach, Karypis, and Kumar, with modification to fit Spark. * The algorithm starts from a single cluster that contains all points. @@ -54,7 +52,6 @@ import org.apache.spark.storage.StorageLevel * KDD Workshop on Text Mining, 2000.]] */ @Since("1.6.0") -@Experimental class BisectingKMeans private ( private var k: Int, private var maxIterations: Int, @@ -398,8 +395,6 @@ private object BisectingKMeans extends Serializable { } /** - * :: Experimental :: - * * Represents a node in a clustering tree. * * @param index node index, negative for internal nodes and non-negative for leaf nodes @@ -411,7 +406,6 @@ private object BisectingKMeans extends Serializable { * @param children children nodes */ @Since("1.6.0") -@Experimental private[clustering] class ClusteringTreeNode private[clustering] ( val index: Int, val size: Long, http://git-wip-us.apache.org/repos/asf/spark/blob/01f09b16/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala index 11fd940..8438015 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala @@ -23,7 +23,7 @@ import org.json4s.jackson.JsonMethods._ import org.json4s.JsonDSL._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging import org.apache.spark.mllib.linalg.Vector @@ -32,8 +32,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} /** - * :: Experimental :: - * * Clustering model produced by [[BisectingKMeans]]. * The prediction is done level-by-level from the root node to a leaf node, and at each node among * its children the closest to the input point is selected. @@ -41,7 +39,6 @@ import org.apache.spark.sql.{Row, SparkSession} * @param root the root node of the clustering tree */ @Since("1.6.0") -@Experimental class BisectingKMeansModel private[clustering] ( private[clustering] val root: ClusteringTreeNode ) extends Serializable with Saveable with Logging { http://git-wip-us.apache.org/repos/asf/spark/blob/01f09b16/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index d295826..9ebba1d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -25,7 +25,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.{JavaPairRDD, JavaRDD} import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId} import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors} @@ -426,13 +426,10 @@ class LocalLDAModel private[spark] ( } /** - * :: Experimental :: - * * Local (non-distributed) model fitted by [[LDA]]. * * This model stores the inferred topics only; it does not store info about the training dataset. */ -@Experimental
[2/2] spark git commit: [SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML
[SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML ## What changes were proposed in this pull request? General decisions to follow, except where noted: * spark.mllib, pyspark.mllib: Remove all Experimental annotations. Leave DeveloperApi annotations alone. * spark.ml, pyspark.ml ** Annotate Estimator-Model pairs of classes and companion objects the same way. ** For all algorithms marked Experimental with Since tag <= 1.6, remove Experimental annotation. ** For all algorithms marked Experimental with Since tag = 2.0, leave Experimental annotation. * DeveloperApi annotations are left alone, except where noted. * No changes to which types are sealed. Exceptions where I am leaving items Experimental in spark.ml, pyspark.ml, mainly because the items are new: * Model Summary classes * MLWriter, MLReader, MLWritable, MLReadable * Evaluator and subclasses: There is discussion of changes around evaluating multiple metrics at once for efficiency. * RFormula: Its behavior may need to change slightly to match R in edge cases. * AFTSurvivalRegression * MultilayerPerceptronClassifier DeveloperApi changes: * ml.tree.Node, ml.tree.Split, and subclasses should no longer be DeveloperApi ## How was this patch tested? N/A Note to reviewers: * spark.ml.clustering.LDA underwent significant changes (additional methods), so let me know if you want me to leave it Experimental. * Be careful to check for cases where a class should no longer be Experimental but has an Experimental method, val, or other feature. I did not find such cases, but please verify. Author: Joseph K. BradleyCloses #14147 from jkbradley/experimental-audit. (cherry picked from commit 01f09b161217193b797c8c85969d17054c958615) Signed-off-by: Joseph K. Bradley Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2e97f3a0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2e97f3a0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2e97f3a0 Branch: refs/heads/branch-2.0 Commit: 2e97f3a08e3b48ce8ad0d669ef844210d0a3d2be Parents: 90f0e81 Author: Joseph K. Bradley Authored: Wed Jul 13 12:33:39 2016 -0700 Committer: Joseph K. Bradley Committed: Wed Jul 13 12:34:15 2016 -0700 -- .../scala/org/apache/spark/ml/Pipeline.scala| 6 +- .../classification/DecisionTreeClassifier.scala | 7 +-- .../spark/ml/classification/GBTClassifier.scala | 7 +-- .../ml/classification/LogisticRegression.scala | 4 -- .../spark/ml/classification/NaiveBayes.scala| 6 +- .../spark/ml/classification/OneVsRest.scala | 7 +-- .../classification/RandomForestClassifier.scala | 7 +-- .../org/apache/spark/ml/feature/Binarizer.scala | 4 +- .../apache/spark/ml/feature/Bucketizer.scala| 4 +- .../apache/spark/ml/feature/ChiSqSelector.scala | 6 +- .../spark/ml/feature/CountVectorizer.scala | 6 +- .../scala/org/apache/spark/ml/feature/DCT.scala | 4 +- .../spark/ml/feature/ElementwiseProduct.scala | 4 +- .../org/apache/spark/ml/feature/HashingTF.scala | 4 +- .../scala/org/apache/spark/ml/feature/IDF.scala | 6 +- .../apache/spark/ml/feature/Interaction.scala | 4 +- .../apache/spark/ml/feature/LabeledPoint.scala | 2 + .../apache/spark/ml/feature/MinMaxScaler.scala | 6 +- .../org/apache/spark/ml/feature/NGram.scala | 4 +- .../apache/spark/ml/feature/Normalizer.scala| 4 +- .../apache/spark/ml/feature/OneHotEncoder.scala | 4 +- .../scala/org/apache/spark/ml/feature/PCA.scala | 7 +-- .../spark/ml/feature/PolynomialExpansion.scala | 4 +- .../spark/ml/feature/QuantileDiscretizer.scala | 4 +- .../spark/ml/feature/SQLTransformer.scala | 4 +- .../spark/ml/feature/StandardScaler.scala | 6 +- .../spark/ml/feature/StopWordsRemover.scala | 4 +- .../apache/spark/ml/feature/StringIndexer.scala | 8 +-- .../org/apache/spark/ml/feature/Tokenizer.scala | 6 +- .../spark/ml/feature/VectorAssembler.scala | 4 +- .../apache/spark/ml/feature/VectorIndexer.scala | 6 +- .../apache/spark/ml/feature/VectorSlicer.scala | 4 +- .../org/apache/spark/ml/feature/Word2Vec.scala | 7 +-- .../org/apache/spark/ml/param/params.scala | 9 +-- .../apache/spark/ml/recommendation/ALS.scala| 8 +-- .../ml/regression/DecisionTreeRegressor.scala | 7 +-- .../spark/ml/regression/GBTRegressor.scala | 6 -- .../ml/regression/IsotonicRegression.scala | 6 +- .../spark/ml/regression/LinearRegression.scala | 4 -- .../ml/regression/RandomForestRegressor.scala | 7 +-- .../scala/org/apache/spark/ml/tree/Node.scala | 10 +-- .../scala/org/apache/spark/ml/tree/Split.scala | 8 +-- .../apache/spark/ml/tuning/CrossValidator.scala | 6 +- .../spark/ml/tuning/ParamGridBuilder.scala | 4 +-
[2/2] spark git commit: [SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML
[SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML ## What changes were proposed in this pull request? General decisions to follow, except where noted: * spark.mllib, pyspark.mllib: Remove all Experimental annotations. Leave DeveloperApi annotations alone. * spark.ml, pyspark.ml ** Annotate Estimator-Model pairs of classes and companion objects the same way. ** For all algorithms marked Experimental with Since tag <= 1.6, remove Experimental annotation. ** For all algorithms marked Experimental with Since tag = 2.0, leave Experimental annotation. * DeveloperApi annotations are left alone, except where noted. * No changes to which types are sealed. Exceptions where I am leaving items Experimental in spark.ml, pyspark.ml, mainly because the items are new: * Model Summary classes * MLWriter, MLReader, MLWritable, MLReadable * Evaluator and subclasses: There is discussion of changes around evaluating multiple metrics at once for efficiency. * RFormula: Its behavior may need to change slightly to match R in edge cases. * AFTSurvivalRegression * MultilayerPerceptronClassifier DeveloperApi changes: * ml.tree.Node, ml.tree.Split, and subclasses should no longer be DeveloperApi ## How was this patch tested? N/A Note to reviewers: * spark.ml.clustering.LDA underwent significant changes (additional methods), so let me know if you want me to leave it Experimental. * Be careful to check for cases where a class should no longer be Experimental but has an Experimental method, val, or other feature. I did not find such cases, but please verify. Author: Joseph K. BradleyCloses #14147 from jkbradley/experimental-audit. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/01f09b16 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/01f09b16 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/01f09b16 Branch: refs/heads/master Commit: 01f09b161217193b797c8c85969d17054c958615 Parents: d8220c1 Author: Joseph K. Bradley Authored: Wed Jul 13 12:33:39 2016 -0700 Committer: Joseph K. Bradley Committed: Wed Jul 13 12:33:39 2016 -0700 -- .../scala/org/apache/spark/ml/Pipeline.scala| 6 +- .../classification/DecisionTreeClassifier.scala | 7 +-- .../spark/ml/classification/GBTClassifier.scala | 7 +-- .../ml/classification/LogisticRegression.scala | 4 -- .../spark/ml/classification/NaiveBayes.scala| 6 +- .../spark/ml/classification/OneVsRest.scala | 7 +-- .../classification/RandomForestClassifier.scala | 7 +-- .../org/apache/spark/ml/feature/Binarizer.scala | 4 +- .../apache/spark/ml/feature/Bucketizer.scala| 4 +- .../apache/spark/ml/feature/ChiSqSelector.scala | 6 +- .../spark/ml/feature/CountVectorizer.scala | 6 +- .../scala/org/apache/spark/ml/feature/DCT.scala | 4 +- .../spark/ml/feature/ElementwiseProduct.scala | 4 +- .../org/apache/spark/ml/feature/HashingTF.scala | 4 +- .../scala/org/apache/spark/ml/feature/IDF.scala | 6 +- .../apache/spark/ml/feature/Interaction.scala | 4 +- .../apache/spark/ml/feature/LabeledPoint.scala | 2 + .../apache/spark/ml/feature/MinMaxScaler.scala | 6 +- .../org/apache/spark/ml/feature/NGram.scala | 4 +- .../apache/spark/ml/feature/Normalizer.scala| 4 +- .../apache/spark/ml/feature/OneHotEncoder.scala | 4 +- .../scala/org/apache/spark/ml/feature/PCA.scala | 7 +-- .../spark/ml/feature/PolynomialExpansion.scala | 4 +- .../spark/ml/feature/QuantileDiscretizer.scala | 4 +- .../spark/ml/feature/SQLTransformer.scala | 4 +- .../spark/ml/feature/StandardScaler.scala | 6 +- .../spark/ml/feature/StopWordsRemover.scala | 4 +- .../apache/spark/ml/feature/StringIndexer.scala | 8 +-- .../org/apache/spark/ml/feature/Tokenizer.scala | 6 +- .../spark/ml/feature/VectorAssembler.scala | 4 +- .../apache/spark/ml/feature/VectorIndexer.scala | 6 +- .../apache/spark/ml/feature/VectorSlicer.scala | 4 +- .../org/apache/spark/ml/feature/Word2Vec.scala | 7 +-- .../org/apache/spark/ml/param/params.scala | 9 +-- .../apache/spark/ml/recommendation/ALS.scala| 8 +-- .../ml/regression/DecisionTreeRegressor.scala | 7 +-- .../spark/ml/regression/GBTRegressor.scala | 6 -- .../ml/regression/IsotonicRegression.scala | 6 +- .../spark/ml/regression/LinearRegression.scala | 4 -- .../ml/regression/RandomForestRegressor.scala | 7 +-- .../scala/org/apache/spark/ml/tree/Node.scala | 10 +-- .../scala/org/apache/spark/ml/tree/Split.scala | 8 +-- .../apache/spark/ml/tuning/CrossValidator.scala | 6 +- .../spark/ml/tuning/ParamGridBuilder.scala | 4 +- .../spark/ml/tuning/TrainValidationSplit.scala | 6 +- .../mllib/clustering/BisectingKMeans.scala | 8 +--
spark git commit: [SPARK-16435][YARN][MINOR] Add warning log if initialExecutors is less than minExecutors
Repository: spark Updated Branches: refs/heads/branch-2.0 7d9bd951b -> 90f0e8132 [SPARK-16435][YARN][MINOR] Add warning log if initialExecutors is less than minExecutors ## What changes were proposed in this pull request? Currently if `spark.dynamicAllocation.initialExecutors` is less than `spark.dynamicAllocation.minExecutors`, Spark will automatically pick the minExecutors without any warning. While in 1.6 Spark will throw exception if configured like this. So here propose to add warning log if these parameters are configured invalidly. ## How was this patch tested? Unit test added to verify the scenario. Author: jerryshaoCloses #14149 from jerryshao/SPARK-16435. (cherry picked from commit d8220c1e5e94abbdb9643672b918f0d748206db9) Signed-off-by: Tom Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/90f0e813 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/90f0e813 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/90f0e813 Branch: refs/heads/branch-2.0 Commit: 90f0e8132bb40158d6d1b6be77e6b512d837466b Parents: 7d9bd95 Author: jerryshao Authored: Wed Jul 13 13:24:47 2016 -0500 Committer: Tom Graves Committed: Wed Jul 13 13:25:05 2016 -0500 -- .../main/scala/org/apache/spark/util/Utils.scala | 19 ++- .../scala/org/apache/spark/util/UtilsSuite.scala | 3 +++ 2 files changed, 21 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/90f0e813/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 156cf17..a79d195 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2338,10 +2338,27 @@ private[spark] object Utils extends Logging { * Return the initial number of executors for dynamic allocation. */ def getDynamicAllocationInitialExecutors(conf: SparkConf): Int = { -Seq( +if (conf.get(DYN_ALLOCATION_INITIAL_EXECUTORS) < conf.get(DYN_ALLOCATION_MIN_EXECUTORS)) { + logWarning(s"${DYN_ALLOCATION_INITIAL_EXECUTORS.key} less than " + +s"${DYN_ALLOCATION_MIN_EXECUTORS.key} is invalid, ignoring its setting, " + + "please update your configs.") +} + +if (conf.get(EXECUTOR_INSTANCES).getOrElse(0) < conf.get(DYN_ALLOCATION_MIN_EXECUTORS)) { + logWarning(s"${EXECUTOR_INSTANCES.key} less than " + +s"${DYN_ALLOCATION_MIN_EXECUTORS.key} is invalid, ignoring its setting, " + + "please update your configs.") +} + +val initialExecutors = Seq( conf.get(DYN_ALLOCATION_MIN_EXECUTORS), conf.get(DYN_ALLOCATION_INITIAL_EXECUTORS), conf.get(EXECUTOR_INSTANCES).getOrElse(0)).max + +logInfo(s"Using initial executors = $initialExecutors, max of " + + s"${DYN_ALLOCATION_INITIAL_EXECUTORS.key}, ${DYN_ALLOCATION_MIN_EXECUTORS.key} and " + +s"${EXECUTOR_INSTANCES.key}") +initialExecutors } def tryWithResource[R <: Closeable, T](createResource: => R)(f: R => T): T = { http://git-wip-us.apache.org/repos/asf/spark/blob/90f0e813/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index f5d0fb0..30952a9 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -782,6 +782,9 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { conf.set("spark.dynamicAllocation.initialExecutors", "3")) === 4) assert(Utils.getDynamicAllocationInitialExecutors( // should use initialExecutors conf.set("spark.dynamicAllocation.initialExecutors", "5")) === 5) +assert(Utils.getDynamicAllocationInitialExecutors( // should use minExecutors + conf.set("spark.dynamicAllocation.initialExecutors", "2") +.set("spark.executor.instances", "1")) === 3) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16435][YARN][MINOR] Add warning log if initialExecutors is less than minExecutors
Repository: spark Updated Branches: refs/heads/master f376c3726 -> d8220c1e5 [SPARK-16435][YARN][MINOR] Add warning log if initialExecutors is less than minExecutors ## What changes were proposed in this pull request? Currently if `spark.dynamicAllocation.initialExecutors` is less than `spark.dynamicAllocation.minExecutors`, Spark will automatically pick the minExecutors without any warning. While in 1.6 Spark will throw exception if configured like this. So here propose to add warning log if these parameters are configured invalidly. ## How was this patch tested? Unit test added to verify the scenario. Author: jerryshaoCloses #14149 from jerryshao/SPARK-16435. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d8220c1e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d8220c1e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d8220c1e Branch: refs/heads/master Commit: d8220c1e5e94abbdb9643672b918f0d748206db9 Parents: f376c37 Author: jerryshao Authored: Wed Jul 13 13:24:47 2016 -0500 Committer: Tom Graves Committed: Wed Jul 13 13:24:47 2016 -0500 -- .../main/scala/org/apache/spark/util/Utils.scala | 19 ++- .../scala/org/apache/spark/util/UtilsSuite.scala | 3 +++ 2 files changed, 21 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d8220c1e/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 298e624..2e4ec4c 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2342,10 +2342,27 @@ private[spark] object Utils extends Logging { * Return the initial number of executors for dynamic allocation. */ def getDynamicAllocationInitialExecutors(conf: SparkConf): Int = { -Seq( +if (conf.get(DYN_ALLOCATION_INITIAL_EXECUTORS) < conf.get(DYN_ALLOCATION_MIN_EXECUTORS)) { + logWarning(s"${DYN_ALLOCATION_INITIAL_EXECUTORS.key} less than " + +s"${DYN_ALLOCATION_MIN_EXECUTORS.key} is invalid, ignoring its setting, " + + "please update your configs.") +} + +if (conf.get(EXECUTOR_INSTANCES).getOrElse(0) < conf.get(DYN_ALLOCATION_MIN_EXECUTORS)) { + logWarning(s"${EXECUTOR_INSTANCES.key} less than " + +s"${DYN_ALLOCATION_MIN_EXECUTORS.key} is invalid, ignoring its setting, " + + "please update your configs.") +} + +val initialExecutors = Seq( conf.get(DYN_ALLOCATION_MIN_EXECUTORS), conf.get(DYN_ALLOCATION_INITIAL_EXECUTORS), conf.get(EXECUTOR_INSTANCES).getOrElse(0)).max + +logInfo(s"Using initial executors = $initialExecutors, max of " + + s"${DYN_ALLOCATION_INITIAL_EXECUTORS.key}, ${DYN_ALLOCATION_MIN_EXECUTORS.key} and " + +s"${EXECUTOR_INSTANCES.key}") +initialExecutors } def tryWithResource[R <: Closeable, T](createResource: => R)(f: R => T): T = { http://git-wip-us.apache.org/repos/asf/spark/blob/d8220c1e/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index f5d0fb0..30952a9 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -782,6 +782,9 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { conf.set("spark.dynamicAllocation.initialExecutors", "3")) === 4) assert(Utils.getDynamicAllocationInitialExecutors( // should use initialExecutors conf.set("spark.dynamicAllocation.initialExecutors", "5")) === 5) +assert(Utils.getDynamicAllocationInitialExecutors( // should use minExecutors + conf.set("spark.dynamicAllocation.initialExecutors", "2") +.set("spark.executor.instances", "1")) === 3) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16343][SQL] Improve the PushDownPredicate rule to pushdown predicates correctly in non-deterministic condition.
Repository: spark Updated Branches: refs/heads/master ea06e4ef3 -> f376c3726 [SPARK-16343][SQL] Improve the PushDownPredicate rule to pushdown predicates correctly in non-deterministic condition. ## What changes were proposed in this pull request? Currently our Optimizer may reorder the predicates to run them more efficient, but in non-deterministic condition, change the order between deterministic parts and non-deterministic parts may change the number of input rows. For example: ```SELECT a FROM t WHERE rand() < 0.1 AND a = 1``` And ```SELECT a FROM t WHERE a = 1 AND rand() < 0.1``` may call rand() for different times and therefore the output rows differ. This PR improved this condition by checking whether the predicate is placed before any non-deterministic predicates. ## How was this patch tested? Expanded related testcases in FilterPushdownSuite. Author: èæåCloses #14012 from jiangxb1987/ppd. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f376c372 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f376c372 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f376c372 Branch: refs/heads/master Commit: f376c37268848dbb4b2fb57677e22ef2bf207b49 Parents: ea06e4e Author: èæå Authored: Thu Jul 14 00:21:27 2016 +0800 Committer: Cheng Lian Committed: Thu Jul 14 00:21:27 2016 +0800 -- .../sql/catalyst/optimizer/Optimizer.scala | 44 +--- .../optimizer/FilterPushdownSuite.scala | 8 ++-- 2 files changed, 33 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f376c372/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 368e9a5..08fb019 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1128,19 +1128,23 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper { project.copy(child = Filter(replaceAlias(condition, aliasMap), grandChild)) // Push [[Filter]] operators through [[Window]] operators. Parts of the predicate that can be -// pushed beneath must satisfy the following two conditions: +// pushed beneath must satisfy the following conditions: // 1. All the expressions are part of window partitioning key. The expressions can be compound. -// 2. Deterministic +// 2. Deterministic. +// 3. Placed before any non-deterministic predicates. case filter @ Filter(condition, w: Window) if w.partitionSpec.forall(_.isInstanceOf[AttributeReference]) => val partitionAttrs = AttributeSet(w.partitionSpec.flatMap(_.references)) - val (pushDown, stayUp) = splitConjunctivePredicates(condition).partition { cond => -cond.references.subsetOf(partitionAttrs) && cond.deterministic && - // This is for ensuring all the partitioning expressions have been converted to alias - // in Analyzer. Thus, we do not need to check if the expressions in conditions are - // the same as the expressions used in partitioning columns. - partitionAttrs.forall(_.isInstanceOf[Attribute]) + + val (candidates, containingNonDeterministic) = +splitConjunctivePredicates(condition).span(_.deterministic) + + val (pushDown, rest) = candidates.partition { cond => +cond.references.subsetOf(partitionAttrs) } + + val stayUp = rest ++ containingNonDeterministic + if (pushDown.nonEmpty) { val pushDownPredicate = pushDown.reduce(And) val newWindow = w.copy(child = Filter(pushDownPredicate, w.child)) @@ -1159,11 +1163,16 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper { // For each filter, expand the alias and check if the filter can be evaluated using // attributes produced by the aggregate operator's child operator. - val (pushDown, stayUp) = splitConjunctivePredicates(condition).partition { cond => + val (candidates, containingNonDeterministic) = +splitConjunctivePredicates(condition).span(_.deterministic) + + val (pushDown, rest) = candidates.partition { cond => val replaced = replaceAlias(cond, aliasMap) -replaced.references.subsetOf(aggregate.child.outputSet) && replaced.deterministic +replaced.references.subsetOf(aggregate.child.outputSet) } + val stayUp =
spark git commit: [SPARK-16469] enhanced simulate multiply
Repository: spark Updated Branches: refs/heads/branch-2.0 5a71a0501 -> 7d9bd951b [SPARK-16469] enhanced simulate multiply ## What changes were proposed in this pull request? We have a use case of multiplying very big sparse matrices. we have about 1000x1000 distributed block matrices multiplication and the simulate multiply goes like O(n^4) (n being 1000). it takes about 1.5 hours. We modified it slightly with classical hashmap and now run in about 30 seconds O(n^2). ## How was this patch tested? We have added a performance test and verified the reduced time. Author: oravivCloses #14068 from uzadude/master. (cherry picked from commit ea06e4ef34c860219a9aeec81816ef53ada96253) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d9bd951 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d9bd951 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d9bd951 Branch: refs/heads/branch-2.0 Commit: 7d9bd951b0b5767ef2c95eb7467f35c9409e7d8c Parents: 5a71a05 Author: oraviv Authored: Wed Jul 13 14:47:08 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 14:47:47 2016 +0100 -- .../spark/mllib/linalg/distributed/BlockMatrix.scala | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7d9bd951/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 639295c..9782350 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -426,16 +426,21 @@ class BlockMatrix @Since("1.3.0") ( partitioner: GridPartitioner): (BlockDestinations, BlockDestinations) = { val leftMatrix = blockInfo.keys.collect() // blockInfo should already be cached val rightMatrix = other.blocks.keys.collect() + +val rightCounterpartsHelper = rightMatrix.groupBy(_._1).mapValues(_.map(_._2)) val leftDestinations = leftMatrix.map { case (rowIndex, colIndex) => - val rightCounterparts = rightMatrix.filter(_._1 == colIndex) - val partitions = rightCounterparts.map(b => partitioner.getPartition((rowIndex, b._2))) + val rightCounterparts = rightCounterpartsHelper.getOrElse(colIndex, Array()) + val partitions = rightCounterparts.map(b => partitioner.getPartition((rowIndex, b))) ((rowIndex, colIndex), partitions.toSet) }.toMap + +val leftCounterpartsHelper = leftMatrix.groupBy(_._2).mapValues(_.map(_._1)) val rightDestinations = rightMatrix.map { case (rowIndex, colIndex) => - val leftCounterparts = leftMatrix.filter(_._2 == rowIndex) - val partitions = leftCounterparts.map(b => partitioner.getPartition((b._1, colIndex))) + val leftCounterparts = leftCounterpartsHelper.getOrElse(rowIndex, Array()) + val partitions = leftCounterparts.map(b => partitioner.getPartition((b, colIndex))) ((rowIndex, colIndex), partitions.toSet) }.toMap + (leftDestinations, rightDestinations) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16469] enhanced simulate multiply
Repository: spark Updated Branches: refs/heads/master 51ade51a9 -> ea06e4ef3 [SPARK-16469] enhanced simulate multiply ## What changes were proposed in this pull request? We have a use case of multiplying very big sparse matrices. we have about 1000x1000 distributed block matrices multiplication and the simulate multiply goes like O(n^4) (n being 1000). it takes about 1.5 hours. We modified it slightly with classical hashmap and now run in about 30 seconds O(n^2). ## How was this patch tested? We have added a performance test and verified the reduced time. Author: oravivCloses #14068 from uzadude/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea06e4ef Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea06e4ef Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea06e4ef Branch: refs/heads/master Commit: ea06e4ef34c860219a9aeec81816ef53ada96253 Parents: 51ade51 Author: oraviv Authored: Wed Jul 13 14:47:08 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 14:47:08 2016 +0100 -- .../spark/mllib/linalg/distributed/BlockMatrix.scala | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ea06e4ef/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 639295c..9782350 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -426,16 +426,21 @@ class BlockMatrix @Since("1.3.0") ( partitioner: GridPartitioner): (BlockDestinations, BlockDestinations) = { val leftMatrix = blockInfo.keys.collect() // blockInfo should already be cached val rightMatrix = other.blocks.keys.collect() + +val rightCounterpartsHelper = rightMatrix.groupBy(_._1).mapValues(_.map(_._2)) val leftDestinations = leftMatrix.map { case (rowIndex, colIndex) => - val rightCounterparts = rightMatrix.filter(_._1 == colIndex) - val partitions = rightCounterparts.map(b => partitioner.getPartition((rowIndex, b._2))) + val rightCounterparts = rightCounterpartsHelper.getOrElse(colIndex, Array()) + val partitions = rightCounterparts.map(b => partitioner.getPartition((rowIndex, b))) ((rowIndex, colIndex), partitions.toSet) }.toMap + +val leftCounterpartsHelper = leftMatrix.groupBy(_._2).mapValues(_.map(_._1)) val rightDestinations = rightMatrix.map { case (rowIndex, colIndex) => - val leftCounterparts = leftMatrix.filter(_._2 == rowIndex) - val partitions = leftCounterparts.map(b => partitioner.getPartition((b._1, colIndex))) + val leftCounterparts = leftCounterpartsHelper.getOrElse(rowIndex, Array()) + val partitions = leftCounterparts.map(b => partitioner.getPartition((b, colIndex))) ((rowIndex, colIndex), partitions.toSet) }.toMap + (leftDestinations, rightDestinations) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for long runs
Repository: spark Updated Branches: refs/heads/branch-1.6 fb0933681 -> 4381e2121 [SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for long runs ## What changes were proposed in this pull request? Unpersist broadcasted vars in Word2Vec.fit for more timely / reliable resource cleanup ## How was this patch tested? Jenkins tests Author: Sean OwenCloses #14153 from srowen/SPARK-16440. (cherry picked from commit 51ade51a9fd64fc2fe651c505a286e6f29f59d40) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4381e212 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4381e212 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4381e212 Branch: refs/heads/branch-1.6 Commit: 4381e212140102b4bce756146c09e866c7b2d85c Parents: fb09336 Author: Sean Owen Authored: Wed Jul 13 11:39:32 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 11:39:49 2016 +0100 -- .../src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4381e212/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 30a1849..c2ed896 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -416,6 +416,9 @@ class Word2Vec extends Serializable with Logging { } } newSentences.unpersist() +expTable.unpersist() +bcVocab.unpersist() +bcVocabHash.unpersist() val wordArray = vocab.map(_.word) new Word2VecModel(wordArray.zipWithIndex.toMap, syn0Global) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for long runs
Repository: spark Updated Branches: refs/heads/master 3d6f679cf -> 51ade51a9 [SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for long runs ## What changes were proposed in this pull request? Unpersist broadcasted vars in Word2Vec.fit for more timely / reliable resource cleanup ## How was this patch tested? Jenkins tests Author: Sean OwenCloses #14153 from srowen/SPARK-16440. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51ade51a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51ade51a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51ade51a Branch: refs/heads/master Commit: 51ade51a9fd64fc2fe651c505a286e6f29f59d40 Parents: 3d6f679 Author: Sean Owen Authored: Wed Jul 13 11:39:32 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 11:39:32 2016 +0100 -- .../src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/51ade51a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index f2211df..6b9c8ee 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -434,6 +434,9 @@ class Word2Vec extends Serializable with Logging { bcSyn1Global.unpersist(false) } newSentences.unpersist() +expTable.unpersist() +bcVocab.unpersist() +bcVocabHash.unpersist() val wordArray = vocab.map(_.word) new Word2VecModel(wordArray.zipWithIndex.toMap, syn0Global) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for long runs
Repository: spark Updated Branches: refs/heads/branch-2.0 74ad486dc -> 5a71a0501 [SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for long runs ## What changes were proposed in this pull request? Unpersist broadcasted vars in Word2Vec.fit for more timely / reliable resource cleanup ## How was this patch tested? Jenkins tests Author: Sean OwenCloses #14153 from srowen/SPARK-16440. (cherry picked from commit 51ade51a9fd64fc2fe651c505a286e6f29f59d40) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5a71a050 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5a71a050 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5a71a050 Branch: refs/heads/branch-2.0 Commit: 5a71a05015ac7aabfb6c4aa8753abc87ead20718 Parents: 74ad486 Author: Sean Owen Authored: Wed Jul 13 11:39:32 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 11:39:39 2016 +0100 -- .../src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5a71a050/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index f2211df..6b9c8ee 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -434,6 +434,9 @@ class Word2Vec extends Serializable with Logging { bcSyn1Global.unpersist(false) } newSentences.unpersist() +expTable.unpersist() +bcVocab.unpersist() +bcVocabHash.unpersist() val wordArray = vocab.map(_.word) new Word2VecModel(wordArray.zipWithIndex.toMap, syn0Global) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR][YARN] Fix code error in yarn-cluster unit test
Repository: spark Updated Branches: refs/heads/branch-2.0 a34a54435 -> 74ad486dc [MINOR][YARN] Fix code error in yarn-cluster unit test ## What changes were proposed in this pull request? Fix code error in yarn-cluster unit test. ## How was this patch tested? Use exist tests Author: sharkdCloses #14166 from sharkdtu/master. (cherry picked from commit 3d6f679cfe5945a9f72841727342af39e9410e0a) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/74ad486d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/74ad486d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/74ad486d Branch: refs/heads/branch-2.0 Commit: 74ad486dc886d8899c79eae8a78622eff05aeab6 Parents: a34a544 Author: sharkd Authored: Wed Jul 13 11:36:02 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 11:36:09 2016 +0100 -- .../test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/74ad486d/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala -- diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 874e304..1ccd7e5 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -97,7 +97,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite { } test("run Spark in yarn-cluster mode with different configurations") { -testBasicYarnApp(true, +testBasicYarnApp(false, Map( "spark.driver.memory" -> "512m", "spark.driver.cores" -> "1", - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR][YARN] Fix code error in yarn-cluster unit test
Repository: spark Updated Branches: refs/heads/master bf107f1e6 -> 3d6f679cf [MINOR][YARN] Fix code error in yarn-cluster unit test ## What changes were proposed in this pull request? Fix code error in yarn-cluster unit test. ## How was this patch tested? Use exist tests Author: sharkdCloses #14166 from sharkdtu/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d6f679c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d6f679c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d6f679c Branch: refs/heads/master Commit: 3d6f679cfe5945a9f72841727342af39e9410e0a Parents: bf107f1 Author: sharkd Authored: Wed Jul 13 11:36:02 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 11:36:02 2016 +0100 -- .../test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3d6f679c/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala -- diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 874e304..1ccd7e5 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -97,7 +97,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite { } test("run Spark in yarn-cluster mode with different configurations") { -testBasicYarnApp(true, +testBasicYarnApp(false, Map( "spark.driver.memory" -> "512m", "spark.driver.cores" -> "1", - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16438] Add Asynchronous Actions documentation
Repository: spark Updated Branches: refs/heads/branch-2.0 38787ec93 -> a34a54435 [SPARK-16438] Add Asynchronous Actions documentation ## What changes were proposed in this pull request? Add Asynchronous Actions documentation inside action of programming guide ## How was this patch tested? check the documentation indentation and formatting with md preview. Author: sandyCloses #14104 from phalodi/SPARK-16438. (cherry picked from commit bf107f1e6522f9138d454b0723089c24626e775a) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a34a5443 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a34a5443 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a34a5443 Branch: refs/heads/branch-2.0 Commit: a34a54435f6af572b33017945dd34a1b5898bf31 Parents: 38787ec Author: sandy Authored: Wed Jul 13 11:33:46 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 11:33:54 2016 +0100 -- docs/programming-guide.md | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a34a5443/docs/programming-guide.md -- diff --git a/docs/programming-guide.md b/docs/programming-guide.md index 3872aec..2bc4912 100644 --- a/docs/programming-guide.md +++ b/docs/programming-guide.md @@ -1101,6 +1101,9 @@ for details. +The Spark RDD API also exposes asynchronous versions of some actions, like `foreachAsync` for `foreach`, which immediately return a `FutureAction` to the caller instead of blocking on completion of the action. This can be used to manage or wait for the asynchronous execution of the action. + + ### Shuffle operations Certain operations within Spark trigger an event known as the shuffle. The shuffle is Spark's - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16438] Add Asynchronous Actions documentation
Repository: spark Updated Branches: refs/heads/master 83879ebc5 -> bf107f1e6 [SPARK-16438] Add Asynchronous Actions documentation ## What changes were proposed in this pull request? Add Asynchronous Actions documentation inside action of programming guide ## How was this patch tested? check the documentation indentation and formatting with md preview. Author: sandyCloses #14104 from phalodi/SPARK-16438. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf107f1e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf107f1e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf107f1e Branch: refs/heads/master Commit: bf107f1e6522f9138d454b0723089c24626e775a Parents: 83879eb Author: sandy Authored: Wed Jul 13 11:33:46 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 11:33:46 2016 +0100 -- docs/programming-guide.md | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bf107f1e/docs/programming-guide.md -- diff --git a/docs/programming-guide.md b/docs/programming-guide.md index 3872aec..2bc4912 100644 --- a/docs/programming-guide.md +++ b/docs/programming-guide.md @@ -1101,6 +1101,9 @@ for details. +The Spark RDD API also exposes asynchronous versions of some actions, like `foreachAsync` for `foreach`, which immediately return a `FutureAction` to the caller instead of blocking on completion of the action. This can be used to manage or wait for the asynchronous execution of the action. + + ### Shuffle operations Certain operations within Spark trigger an event known as the shuffle. The shuffle is Spark's - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16439] Fix number formatting in SQL UI
Repository: spark Updated Branches: refs/heads/branch-2.0 934e2aa4f -> 38787ec93 [SPARK-16439] Fix number formatting in SQL UI ## What changes were proposed in this pull request? Spark SQL UI display numbers greater than 1000 with u00A0 as grouping separator. Problem exists when server locale has no-breaking space as separator. (for example pl_PL) This patch turns off grouping and remove this separator. The problem starts with this PR. https://github.com/apache/spark/pull/12425/files#diff-803f475b01acfae1c5c96807c2ea9ddcR125 ## How was this patch tested? Manual UI tests. Screenshot attached. ![image](https://cloud.githubusercontent.com/assets/4006010/16749556/5cb5a372-47cb-11e6-9a95-67fd3f9d1c71.png) Author: Maciej BrynskiCloses #14142 from maver1ck/master. (cherry picked from commit 83879ebc5850b74369a5b066c65fa9929bbdb21c) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/38787ec9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/38787ec9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/38787ec9 Branch: refs/heads/branch-2.0 Commit: 38787ec9361bde444ba00cac6822c491acd14fcc Parents: 934e2aa Author: Maciej Brynski Authored: Wed Jul 13 10:50:26 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 10:50:34 2016 +0100 -- .../scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/38787ec9/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala index edfdf7c..9817a56 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala @@ -101,7 +101,9 @@ private[sql] object SQLMetrics { */ def stringValue(metricsType: String, values: Seq[Long]): String = { if (metricsType == SUM_METRIC) { - NumberFormat.getInstance().format(values.sum) + val numberFormat = NumberFormat.getInstance() + numberFormat.setGroupingUsed(false) + numberFormat.format(values.sum) } else { val strFormat: Long => String = if (metricsType == SIZE_METRIC) { Utils.bytesToString - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16439] Fix number formatting in SQL UI
Repository: spark Updated Branches: refs/heads/master f73891e0b -> 83879ebc5 [SPARK-16439] Fix number formatting in SQL UI ## What changes were proposed in this pull request? Spark SQL UI display numbers greater than 1000 with u00A0 as grouping separator. Problem exists when server locale has no-breaking space as separator. (for example pl_PL) This patch turns off grouping and remove this separator. The problem starts with this PR. https://github.com/apache/spark/pull/12425/files#diff-803f475b01acfae1c5c96807c2ea9ddcR125 ## How was this patch tested? Manual UI tests. Screenshot attached. ![image](https://cloud.githubusercontent.com/assets/4006010/16749556/5cb5a372-47cb-11e6-9a95-67fd3f9d1c71.png) Author: Maciej BrynskiCloses #14142 from maver1ck/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/83879ebc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/83879ebc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/83879ebc Branch: refs/heads/master Commit: 83879ebc5850b74369a5b066c65fa9929bbdb21c Parents: f73891e Author: Maciej Brynski Authored: Wed Jul 13 10:50:26 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 10:50:26 2016 +0100 -- .../scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/83879ebc/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala index edfdf7c..9817a56 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala @@ -101,7 +101,9 @@ private[sql] object SQLMetrics { */ def stringValue(metricsType: String, values: Seq[Long]): String = { if (metricsType == SUM_METRIC) { - NumberFormat.getInstance().format(values.sum) + val numberFormat = NumberFormat.getInstance() + numberFormat.setGroupingUsed(false) + numberFormat.format(values.sum) } else { val strFormat: Long => String = if (metricsType == SIZE_METRIC) { Utils.bytesToString - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Fix Java style errors and remove unused imports
Repository: spark Updated Branches: refs/heads/branch-2.0 5301efc17 -> 934e2aa4f [MINOR] Fix Java style errors and remove unused imports Fix Java style errors and remove unused imports, which are randomly found Tested on my local machine. Author: Xin RenCloses #14161 from keypointt/SPARK-16437. (cherry picked from commit f73891e0b9640e14455bdbfd999a8ff10b78a819) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/934e2aa4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/934e2aa4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/934e2aa4 Branch: refs/heads/branch-2.0 Commit: 934e2aa4f8aea409c8814f394f760f5952bd48f1 Parents: 5301efc Author: Xin Ren Authored: Wed Jul 13 10:47:07 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 10:48:39 2016 +0100 -- .../sql/execution/datasources/parquet/ParquetFileFormat.scala | 3 +-- .../test/scala/org/apache/spark/sql/sources/DataSourceTest.scala | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/934e2aa4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 8cbdaeb..f1c78bb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -778,8 +778,7 @@ private[sql] object ParquetFileFormat extends Logging { val assumeBinaryIsString = sparkSession.sessionState.conf.isParquetBinaryAsString val assumeInt96IsTimestamp = sparkSession.sessionState.conf.isParquetINT96AsTimestamp val writeLegacyParquetFormat = sparkSession.sessionState.conf.writeLegacyParquetFormat -val serializedConf = - new SerializableConfiguration(sparkSession.sessionState.newHadoopConf()) +val serializedConf = new SerializableConfiguration(sparkSession.sessionState.newHadoopConf()) // !! HACK ALERT !! // http://git-wip-us.apache.org/repos/asf/spark/blob/934e2aa4/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala index 206d03e..cc77d3c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.sources import org.apache.spark.sql._ -import org.apache.spark.sql.internal.SQLConf private[sql] abstract class DataSourceTest extends QueryTest { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Fix Java style errors and remove unused imports
Repository: spark Updated Branches: refs/heads/master f156136da -> f73891e0b [MINOR] Fix Java style errors and remove unused imports ## What changes were proposed in this pull request? Fix Java style errors and remove unused imports, which are randomly found ## How was this patch tested? Tested on my local machine. Author: Xin RenCloses #14161 from keypointt/SPARK-16437. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f73891e0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f73891e0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f73891e0 Branch: refs/heads/master Commit: f73891e0b9640e14455bdbfd999a8ff10b78a819 Parents: f156136 Author: Xin Ren Authored: Wed Jul 13 10:47:07 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 10:47:07 2016 +0100 -- .../org/apache/spark/unsafe/memory/HeapMemoryAllocator.java| 1 - .../java/org/apache/spark/unsafe/memory/MemoryAllocator.java | 6 +++--- .../sql/execution/datasources/parquet/ParquetFileFormat.scala | 3 +-- .../scala/org/apache/spark/sql/sources/DataSourceTest.scala| 1 - 4 files changed, 4 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f73891e0/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java -- diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java index 3cd4264..3557482 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java @@ -24,7 +24,6 @@ import java.util.LinkedList; import java.util.Map; import org.apache.spark.unsafe.Platform; -import org.apache.spark.unsafe.memory.MemoryAllocator; /** * A simple {@link MemoryAllocator} that can allocate up to 16GB using a JVM long primitive array. http://git-wip-us.apache.org/repos/asf/spark/blob/f73891e0/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java -- diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java index 8bd2b06..7b58868 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java @@ -23,12 +23,12 @@ public interface MemoryAllocator { * Whether to fill newly allocated and deallocated memory with 0xa5 and 0x5a bytes respectively. * This helps catch misuse of uninitialized or freed memory, but imposes some overhead. */ - public static final boolean MEMORY_DEBUG_FILL_ENABLED = Boolean.parseBoolean( + boolean MEMORY_DEBUG_FILL_ENABLED = Boolean.parseBoolean( System.getProperty("spark.memory.debugFill", "false")); // Same as jemalloc's debug fill values. - public static final byte MEMORY_DEBUG_FILL_CLEAN_VALUE = (byte)0xa5; - public static final byte MEMORY_DEBUG_FILL_FREED_VALUE = (byte)0x5a; + byte MEMORY_DEBUG_FILL_CLEAN_VALUE = (byte)0xa5; + byte MEMORY_DEBUG_FILL_FREED_VALUE = (byte)0x5a; /** * Allocates a contiguous block of memory. Note that the allocated memory is not guaranteed http://git-wip-us.apache.org/repos/asf/spark/blob/f73891e0/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 76d7f5c..772e031 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -780,8 +780,7 @@ private[sql] object ParquetFileFormat extends Logging { val assumeBinaryIsString = sparkSession.sessionState.conf.isParquetBinaryAsString val assumeInt96IsTimestamp = sparkSession.sessionState.conf.isParquetINT96AsTimestamp val writeLegacyParquetFormat = sparkSession.sessionState.conf.writeLegacyParquetFormat -val serializedConf = - new SerializableConfiguration(sparkSession.sessionState.newHadoopConf()) +val serializedConf = new SerializableConfiguration(sparkSession.sessionState.newHadoopConf()) // !!
spark git commit: [SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to numSkippedTasks
Repository: spark Updated Branches: refs/heads/branch-2.0 4b93a833b -> 5301efc17 [SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to numSkippedTasks ## What changes were proposed in this pull request? I fixed a misassigned var, numCompletedTasks was assigned to numSkippedTasks in the convertJobData method ## How was this patch tested? dev/run-tests Author: Alex BozarthCloses #14141 from ajbozarth/spark16375. (cherry picked from commit f156136dae5df38f73a25cf3fb48f98f417ef059) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5301efc1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5301efc1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5301efc1 Branch: refs/heads/branch-2.0 Commit: 5301efc1779c9d4a14ea1238bbbfa4aab25d3163 Parents: 4b93a83 Author: Alex Bozarth Authored: Wed Jul 13 10:45:06 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 10:45:14 2016 +0100 -- .../scala/org/apache/spark/status/api/v1/AllJobsResource.scala | 2 +- .../job_list_from_multi_attempt_app_json_1__expectation.json | 2 +- .../job_list_from_multi_attempt_app_json_2__expectation.json | 2 +- .../HistoryServerExpectations/job_list_json_expectation.json | 6 +++--- .../HistoryServerExpectations/one_job_json_expectation.json| 2 +- .../succeeded_failed_job_list_json_expectation.json| 6 +++--- .../succeeded_job_list_json_expectation.json | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5301efc1/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala -- diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala index b21d36d..d0d9ef1 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala @@ -91,7 +91,7 @@ private[v1] object AllJobsResource { numTasks = job.numTasks, numActiveTasks = job.numActiveTasks, numCompletedTasks = job.numCompletedTasks, -numSkippedTasks = job.numCompletedTasks, +numSkippedTasks = job.numSkippedTasks, numFailedTasks = job.numFailedTasks, numActiveStages = job.numActiveStages, numCompletedStages = job.completedStageIndices.size, http://git-wip-us.apache.org/repos/asf/spark/blob/5301efc1/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json -- diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json index bb6bf43..c108fa6 100644 --- a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json @@ -6,7 +6,7 @@ "numTasks" : 8, "numActiveTasks" : 0, "numCompletedTasks" : 8, - "numSkippedTasks" : 8, + "numSkippedTasks" : 0, "numFailedTasks" : 0, "numActiveStages" : 0, "numCompletedStages" : 1, http://git-wip-us.apache.org/repos/asf/spark/blob/5301efc1/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json -- diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json index bb6bf43..c108fa6 100644 --- a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json @@ -6,7 +6,7 @@ "numTasks" : 8, "numActiveTasks" : 0, "numCompletedTasks" : 8, - "numSkippedTasks" : 8, + "numSkippedTasks" : 0, "numFailedTasks" : 0, "numActiveStages" : 0, "numCompletedStages" : 1, http://git-wip-us.apache.org/repos/asf/spark/blob/5301efc1/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json -- diff --git
spark git commit: [SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to numSkippedTasks
Repository: spark Updated Branches: refs/heads/master c190d89bd -> f156136da [SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to numSkippedTasks ## What changes were proposed in this pull request? I fixed a misassigned var, numCompletedTasks was assigned to numSkippedTasks in the convertJobData method ## How was this patch tested? dev/run-tests Author: Alex BozarthCloses #14141 from ajbozarth/spark16375. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f156136d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f156136d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f156136d Branch: refs/heads/master Commit: f156136dae5df38f73a25cf3fb48f98f417ef059 Parents: c190d89 Author: Alex Bozarth Authored: Wed Jul 13 10:45:06 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 10:45:06 2016 +0100 -- .../scala/org/apache/spark/status/api/v1/AllJobsResource.scala | 2 +- .../job_list_from_multi_attempt_app_json_1__expectation.json | 2 +- .../job_list_from_multi_attempt_app_json_2__expectation.json | 2 +- .../HistoryServerExpectations/job_list_json_expectation.json | 6 +++--- .../HistoryServerExpectations/one_job_json_expectation.json| 2 +- .../succeeded_failed_job_list_json_expectation.json| 6 +++--- .../succeeded_job_list_json_expectation.json | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f156136d/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala -- diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala index b21d36d..d0d9ef1 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala @@ -91,7 +91,7 @@ private[v1] object AllJobsResource { numTasks = job.numTasks, numActiveTasks = job.numActiveTasks, numCompletedTasks = job.numCompletedTasks, -numSkippedTasks = job.numCompletedTasks, +numSkippedTasks = job.numSkippedTasks, numFailedTasks = job.numFailedTasks, numActiveStages = job.numActiveStages, numCompletedStages = job.completedStageIndices.size, http://git-wip-us.apache.org/repos/asf/spark/blob/f156136d/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json -- diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json index bb6bf43..c108fa6 100644 --- a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json @@ -6,7 +6,7 @@ "numTasks" : 8, "numActiveTasks" : 0, "numCompletedTasks" : 8, - "numSkippedTasks" : 8, + "numSkippedTasks" : 0, "numFailedTasks" : 0, "numActiveStages" : 0, "numCompletedStages" : 1, http://git-wip-us.apache.org/repos/asf/spark/blob/f156136d/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json -- diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json index bb6bf43..c108fa6 100644 --- a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json @@ -6,7 +6,7 @@ "numTasks" : 8, "numActiveTasks" : 0, "numCompletedTasks" : 8, - "numSkippedTasks" : 8, + "numSkippedTasks" : 0, "numFailedTasks" : 0, "numActiveStages" : 0, "numCompletedStages" : 1, http://git-wip-us.apache.org/repos/asf/spark/blob/f156136d/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json -- diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json index
spark git commit: [SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to numSkippedTasks
Repository: spark Updated Branches: refs/heads/branch-1.6 980db2bd4 -> fb0933681 [SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to numSkippedTasks ## What changes were proposed in this pull request? I fixed a misassigned var, numCompletedTasks was assigned to numSkippedTasks in the convertJobData method ## How was this patch tested? dev/run-tests Author: Alex BozarthCloses #14141 from ajbozarth/spark16375. (cherry picked from commit f156136dae5df38f73a25cf3fb48f98f417ef059) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb093368 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb093368 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb093368 Branch: refs/heads/branch-1.6 Commit: fb0933681db199af85543ccb5601b44a4af92321 Parents: 980db2b Author: Alex Bozarth Authored: Wed Jul 13 10:45:06 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 10:45:24 2016 +0100 -- .../scala/org/apache/spark/status/api/v1/AllJobsResource.scala | 2 +- .../job_list_from_multi_attempt_app_json_1__expectation.json | 2 +- .../job_list_from_multi_attempt_app_json_2__expectation.json | 2 +- .../HistoryServerExpectations/job_list_json_expectation.json | 6 +++--- .../HistoryServerExpectations/one_job_json_expectation.json| 2 +- .../succeeded_failed_job_list_json_expectation.json| 6 +++--- .../succeeded_job_list_json_expectation.json | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fb093368/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala -- diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala index 5783df5..4ac7b3d 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala @@ -86,7 +86,7 @@ private[v1] object AllJobsResource { numTasks = job.numTasks, numActiveTasks = job.numActiveTasks, numCompletedTasks = job.numCompletedTasks, -numSkippedTasks = job.numCompletedTasks, +numSkippedTasks = job.numSkippedTasks, numFailedTasks = job.numFailedTasks, numActiveStages = job.numActiveStages, numCompletedStages = job.completedStageIndices.size, http://git-wip-us.apache.org/repos/asf/spark/blob/fb093368/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json -- diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json index 2e92e1f..1940a60 100644 --- a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json @@ -6,7 +6,7 @@ "numTasks" : 8, "numActiveTasks" : 0, "numCompletedTasks" : 8, - "numSkippedTasks" : 8, + "numSkippedTasks" : 0, "numFailedTasks" : 0, "numActiveStages" : 0, "numCompletedStages" : 1, http://git-wip-us.apache.org/repos/asf/spark/blob/fb093368/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json -- diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json index 2e92e1f..1940a60 100644 --- a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json @@ -6,7 +6,7 @@ "numTasks" : 8, "numActiveTasks" : 0, "numCompletedTasks" : 8, - "numSkippedTasks" : 8, + "numSkippedTasks" : 0, "numFailedTasks" : 0, "numActiveStages" : 0, "numCompletedStages" : 1, http://git-wip-us.apache.org/repos/asf/spark/blob/fb093368/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json -- diff --git
spark git commit: [SPARK-15889][STREAMING] Follow-up fix to erroneous condition in StreamTest
Repository: spark Updated Branches: refs/heads/master 772c213ec -> c190d89bd [SPARK-15889][STREAMING] Follow-up fix to erroneous condition in StreamTest ## What changes were proposed in this pull request? A second form of AssertQuery now actually invokes the condition; avoids a build warning too ## How was this patch tested? Jenkins; running StreamTest Author: Sean OwenCloses #14133 from srowen/SPARK-15889.2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c190d89b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c190d89b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c190d89b Branch: refs/heads/master Commit: c190d89bd3cf677400c49238498207b87da9ee78 Parents: 772c213 Author: Sean Owen Authored: Wed Jul 13 10:44:07 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 10:44:07 2016 +0100 -- .../src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c190d89b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index f949652..af2b581 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -189,7 +189,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts { } def apply(message: String)(condition: StreamExecution => Unit): AssertOnQuery = { - new AssertOnQuery(s => { condition; true }, message) + new AssertOnQuery(s => { condition(s); true }, message) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15889][STREAMING] Follow-up fix to erroneous condition in StreamTest
Repository: spark Updated Branches: refs/heads/branch-2.0 5173f847c -> 4b93a833b [SPARK-15889][STREAMING] Follow-up fix to erroneous condition in StreamTest ## What changes were proposed in this pull request? A second form of AssertQuery now actually invokes the condition; avoids a build warning too ## How was this patch tested? Jenkins; running StreamTest Author: Sean OwenCloses #14133 from srowen/SPARK-15889.2. (cherry picked from commit c190d89bd3cf677400c49238498207b87da9ee78) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b93a833 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b93a833 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b93a833 Branch: refs/heads/branch-2.0 Commit: 4b93a833b75d72043fd7770250c25247e690666d Parents: 5173f84 Author: Sean Owen Authored: Wed Jul 13 10:44:07 2016 +0100 Committer: Sean Owen Committed: Wed Jul 13 10:44:15 2016 +0100 -- .../src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4b93a833/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index f949652..af2b581 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -189,7 +189,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts { } def apply(message: String)(condition: StreamExecution => Unit): AssertOnQuery = { - new AssertOnQuery(s => { condition; true }, message) + new AssertOnQuery(s => { condition(s); true }, message) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16303][DOCS][EXAMPLES] Updated SQL programming guide and examples
Repository: spark Updated Branches: refs/heads/branch-2.0 41df62c59 -> 5173f847c [SPARK-16303][DOCS][EXAMPLES] Updated SQL programming guide and examples - Hard-coded Spark SQL sample snippets were moved into source files under examples sub-project. - Removed the inconsistency between Scala and Java Spark SQL examples - Scala and Java Spark SQL examples were updated The work is still in progress. All involved examples were tested manually. An additional round of testing will be done after the code review. ![image](https://cloud.githubusercontent.com/assets/6235869/16710314/51851606-462a-11e6-9fbe-0818daef65e4.png) Author: aokolnychyiCloses #14119 from aokolnychyi/spark_16303. (cherry picked from commit 772c213ec702c80d0f25aa6f30b2dffebfbe2d0d) Signed-off-by: Cheng Lian Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5173f847 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5173f847 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5173f847 Branch: refs/heads/branch-2.0 Commit: 5173f847c55a7b810d1c494c8b23c740ba110c39 Parents: 41df62c Author: aokolnychyi Authored: Wed Jul 13 16:12:05 2016 +0800 Committer: Cheng Lian Committed: Wed Jul 13 16:12:51 2016 +0800 -- docs/sql-programming-guide.md | 572 ++- .../apache/spark/examples/sql/JavaSparkSQL.java | 186 -- .../spark/examples/sql/JavaSparkSqlExample.java | 336 +++ .../examples/sql/JavaSqlDataSourceExample.java | 217 +++ .../examples/sql/hive/JavaSparkHiveExample.java | 131 + .../spark/examples/sql/SparkSqlExample.scala| 254 .../examples/sql/SqlDataSourceExample.scala | 148 + .../spark/examples/sql/hive/HiveFromSpark.scala | 83 --- .../examples/sql/hive/SparkHiveExample.scala| 107 9 files changed, 1228 insertions(+), 806 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5173f847/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 448251c..f5d1fee 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -65,14 +65,14 @@ Throughout this document, we will often refer to Scala/Java Datasets of `Row`s a The entry point into all functionality in Spark is the [`SparkSession`](api/scala/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder()`: -{% include_example init_session scala/org/apache/spark/examples/sql/RDDRelation.scala %} +{% include_example init_session scala/org/apache/spark/examples/sql/SparkSqlExample.scala %} The entry point into all functionality in Spark is the [`SparkSession`](api/java/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder()`: -{% include_example init_session java/org/apache/spark/examples/sql/JavaSparkSQL.java %} +{% include_example init_session java/org/apache/spark/examples/sql/JavaSparkSqlExample.java %} @@ -105,14 +105,7 @@ from a Hive table, or from [Spark data sources](#data-sources). As an example, the following creates a DataFrame based on the content of a JSON file: -{% highlight scala %} -val spark: SparkSession // An existing SparkSession. -val df = spark.read.json("examples/src/main/resources/people.json") - -// Displays the content of the DataFrame to stdout -df.show() -{% endhighlight %} - +{% include_example create_df scala/org/apache/spark/examples/sql/SparkSqlExample.scala %} @@ -121,14 +114,7 @@ from a Hive table, or from [Spark data sources](#data-sources). As an example, the following creates a DataFrame based on the content of a JSON file: -{% highlight java %} -SparkSession spark = ...; // An existing SparkSession. -Dataset df = spark.read().json("examples/src/main/resources/people.json"); - -// Displays the content of the DataFrame to stdout -df.show(); -{% endhighlight %} - +{% include_example create_df java/org/apache/spark/examples/sql/JavaSparkSqlExample.java %} @@ -169,110 +155,20 @@ Here we include some basic examples of structured data processing using Datasets -{% highlight scala %} -val spark: SparkSession // An existing SparkSession - -// Create the DataFrame -val df = spark.read.json("examples/src/main/resources/people.json") - -// Show the content of the DataFrame -df.show() -// age name -// null Michael -// 30 Andy -// 19 Justin - -// Print the schema in a tree format -df.printSchema() -// root -// |-- age: long (nullable = true) -// |-- name: string (nullable = true) - -// Select only the "name" column
spark git commit: [SPARK-16303][DOCS][EXAMPLES] Updated SQL programming guide and examples
Repository: spark Updated Branches: refs/heads/master 1c58fa905 -> 772c213ec [SPARK-16303][DOCS][EXAMPLES] Updated SQL programming guide and examples - Hard-coded Spark SQL sample snippets were moved into source files under examples sub-project. - Removed the inconsistency between Scala and Java Spark SQL examples - Scala and Java Spark SQL examples were updated The work is still in progress. All involved examples were tested manually. An additional round of testing will be done after the code review. ![image](https://cloud.githubusercontent.com/assets/6235869/16710314/51851606-462a-11e6-9fbe-0818daef65e4.png) Author: aokolnychyiCloses #14119 from aokolnychyi/spark_16303. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/772c213e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/772c213e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/772c213e Branch: refs/heads/master Commit: 772c213ec702c80d0f25aa6f30b2dffebfbe2d0d Parents: 1c58fa9 Author: aokolnychyi Authored: Wed Jul 13 16:12:05 2016 +0800 Committer: Cheng Lian Committed: Wed Jul 13 16:12:11 2016 +0800 -- docs/sql-programming-guide.md | 572 ++- .../apache/spark/examples/sql/JavaSparkSQL.java | 186 -- .../spark/examples/sql/JavaSparkSqlExample.java | 336 +++ .../examples/sql/JavaSqlDataSourceExample.java | 217 +++ .../examples/sql/hive/JavaSparkHiveExample.java | 131 + .../spark/examples/sql/SparkSqlExample.scala| 254 .../examples/sql/SqlDataSourceExample.scala | 148 + .../spark/examples/sql/hive/HiveFromSpark.scala | 83 --- .../examples/sql/hive/SparkHiveExample.scala| 107 9 files changed, 1228 insertions(+), 806 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/772c213e/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index e838a13..2076b29 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -65,14 +65,14 @@ Throughout this document, we will often refer to Scala/Java Datasets of `Row`s a The entry point into all functionality in Spark is the [`SparkSession`](api/scala/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder()`: -{% include_example init_session scala/org/apache/spark/examples/sql/RDDRelation.scala %} +{% include_example init_session scala/org/apache/spark/examples/sql/SparkSqlExample.scala %} The entry point into all functionality in Spark is the [`SparkSession`](api/java/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder()`: -{% include_example init_session java/org/apache/spark/examples/sql/JavaSparkSQL.java %} +{% include_example init_session java/org/apache/spark/examples/sql/JavaSparkSqlExample.java %} @@ -105,14 +105,7 @@ from a Hive table, or from [Spark data sources](#data-sources). As an example, the following creates a DataFrame based on the content of a JSON file: -{% highlight scala %} -val spark: SparkSession // An existing SparkSession. -val df = spark.read.json("examples/src/main/resources/people.json") - -// Displays the content of the DataFrame to stdout -df.show() -{% endhighlight %} - +{% include_example create_df scala/org/apache/spark/examples/sql/SparkSqlExample.scala %} @@ -121,14 +114,7 @@ from a Hive table, or from [Spark data sources](#data-sources). As an example, the following creates a DataFrame based on the content of a JSON file: -{% highlight java %} -SparkSession spark = ...; // An existing SparkSession. -Dataset df = spark.read().json("examples/src/main/resources/people.json"); - -// Displays the content of the DataFrame to stdout -df.show(); -{% endhighlight %} - +{% include_example create_df java/org/apache/spark/examples/sql/JavaSparkSqlExample.java %} @@ -169,110 +155,20 @@ Here we include some basic examples of structured data processing using Datasets -{% highlight scala %} -val spark: SparkSession // An existing SparkSession - -// Create the DataFrame -val df = spark.read.json("examples/src/main/resources/people.json") - -// Show the content of the DataFrame -df.show() -// age name -// null Michael -// 30 Andy -// 19 Justin - -// Print the schema in a tree format -df.printSchema() -// root -// |-- age: long (nullable = true) -// |-- name: string (nullable = true) - -// Select only the "name" column -df.select("name").show() -// name -// Michael -// Andy -// Justin - -// Select everybody, but increment the age by 1
spark git commit: [HOTFIX] Fix build break.
Repository: spark Updated Branches: refs/heads/branch-1.6 7c8a399a2 -> 980db2bd4 [HOTFIX] Fix build break. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/980db2bd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/980db2bd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/980db2bd Branch: refs/heads/branch-1.6 Commit: 980db2bd491398ac4d6db3a4550f1a377b6bf577 Parents: 7c8a399 Author: Reynold XinAuthored: Tue Jul 12 23:40:37 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 12 23:40:37 2016 -0700 -- .../expressions/ExpressionEvalHelperSuite.scala | 15 --- 1 file changed, 8 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/980db2bd/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala index 64b65e2..a176fd8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode} import org.apache.spark.sql.types.{DataType, IntegerType} /** @@ -43,12 +43,13 @@ class ExpressionEvalHelperSuite extends SparkFunSuite with ExpressionEvalHelper case class BadCodegenExpression() extends LeafExpression { override def nullable: Boolean = false override def eval(input: InternalRow): Any = 10 - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { -ev.copy(code = - s""" -|int some_variable = 11; -|int ${ev.value} = 10; - """.stripMargin) + + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { +s""" + |int some_variable = 11; + |int ${ev.value} = 10; +""".stripMargin } + override def dataType: DataType = IntegerType } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16489][SQL] Guard against variable reuse mistakes in expression code generation
Repository: spark Updated Branches: refs/heads/branch-1.6 d1c992fea -> 7c8a399a2 [SPARK-16489][SQL] Guard against variable reuse mistakes in expression code generation In code generation, it is incorrect for expressions to reuse variable names across different instances of itself. As an example, SPARK-16488 reports a bug in which pmod expression reuses variable name "r". This patch updates ExpressionEvalHelper test harness to always project two instances of the same expression, which will help us catch variable reuse problems in expression unit tests. This patch also fixes the bug in crc32 expression. This is a test harness change, but I also created a new test suite for testing the test harness. Author: Reynold XinCloses #14146 from rxin/SPARK-16489. (cherry picked from commit c377e49e38a290e5c4fbc178278069788674dfb7) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7c8a399a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7c8a399a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7c8a399a Branch: refs/heads/branch-1.6 Commit: 7c8a399a292de113ebec4235ebe21c9a8fb85256 Parents: d1c992f Author: Reynold Xin Authored: Tue Jul 12 10:07:23 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 12 23:14:17 2016 -0700 -- .../spark/sql/catalyst/expressions/misc.scala | 7 +-- .../expressions/ExpressionEvalHelper.scala | 15 -- .../expressions/ExpressionEvalHelperSuite.scala | 54 3 files changed, 68 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7c8a399a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 0f6d02f..cf9403c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -151,11 +151,12 @@ case class Crc32(child: Expression) extends UnaryExpression with ImplicitCastInp override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { val CRC32 = "java.util.zip.CRC32" +val checksum = ctx.freshName("checksum") nullSafeCodeGen(ctx, ev, value => { s""" -$CRC32 checksum = new $CRC32(); -checksum.update($value, 0, $value.length); -${ev.value} = checksum.getValue(); +$CRC32 $checksum = new $CRC32(); +$checksum.update($value, 0, $value.length); +${ev.value} = $checksum.getValue(); """ }) } http://git-wip-us.apache.org/repos/asf/spark/blob/7c8a399a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 074785e..9f463c5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -160,9 +160,13 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { expression: Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = { - +// SPARK-16489 Explicitly doing code generation twice so code gen will fail if +// some expression is reusing variable names across different instances. +// This behavior is tested in ExpressionEvalHelperSuite. val plan = generateProject( - GenerateUnsafeProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil), + GenerateUnsafeProjection.generate( +Alias(expression, s"Optimized($expression)1")() :: + Alias(expression, s"Optimized($expression)2")() :: Nil), expression) val unsafeRow = plan(inputRow) @@ -170,13 +174,14 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { if (expected == null) { if (!unsafeRow.isNullAt(0)) { -val expectedRow = InternalRow(expected) +val expectedRow = InternalRow(expected, expected) fail("Incorrect evaluation in unsafe mode: " + s"$expression, actual: $unsafeRow, expected: $expectedRow$input") } } else { - val lit = InternalRow(expected) - val
spark git commit: [SPARK-16488] Fix codegen variable namespace collision in pmod and partitionBy
Repository: spark Updated Branches: refs/heads/branch-1.6 9808735e0 -> d1c992fea [SPARK-16488] Fix codegen variable namespace collision in pmod and partitionBy This patch fixes a variable namespace collision bug in pmod and partitionBy Regression test for one possible occurrence. A more general fix in `ExpressionEvalHelper.checkEvaluation` will be in a subsequent PR. Author: Sameer AgarwalCloses #14144 from sameeragarwal/codegen-bug. (cherry picked from commit 9cc74f95edb6e4f56151966139cd0dc24e377949) Signed-off-by: Reynold Xin (cherry picked from commit 689261465ad1dd443ebf764ad837243418b986ef) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d1c992fe Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d1c992fe Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d1c992fe Branch: refs/heads/branch-1.6 Commit: d1c992fea3e5999a3494c398f5040d6102f30aff Parents: 9808735 Author: Sameer Agarwal Authored: Mon Jul 11 20:26:01 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 12 23:13:19 2016 -0700 -- .../sql/catalyst/expressions/arithmetic.scala | 25 ++-- 1 file changed, 13 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d1c992fe/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 61a17fd..cfae285 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -445,34 +445,35 @@ case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic { override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { nullSafeCodeGen(ctx, ev, (eval1, eval2) => { + val remainder = ctx.freshName("remainder") dataType match { case dt: DecimalType => val decimalAdd = "$plus" s""" -${ctx.javaType(dataType)} r = $eval1.remainder($eval2); -if (r.compare(new org.apache.spark.sql.types.Decimal().set(0)) < 0) { - ${ev.value} = (r.$decimalAdd($eval2)).remainder($eval2); +${ctx.javaType(dataType)} $remainder = $eval1.remainder($eval2); +if ($remainder.compare(new org.apache.spark.sql.types.Decimal().set(0)) < 0) { + ${ev.value} = ($remainder.$decimalAdd($eval2)).remainder($eval2); } else { - ${ev.value} = r; + ${ev.value} = $remainder; } """ // byte and short are casted into int when add, minus, times or divide case ByteType | ShortType => s""" -${ctx.javaType(dataType)} r = (${ctx.javaType(dataType)})($eval1 % $eval2); -if (r < 0) { - ${ev.value} = (${ctx.javaType(dataType)})((r + $eval2) % $eval2); +${ctx.javaType(dataType)} $remainder = (${ctx.javaType(dataType)})($eval1 % $eval2); +if ($remainder < 0) { + ${ev.value} = (${ctx.javaType(dataType)})(($remainder + $eval2) % $eval2); } else { - ${ev.value} = r; + ${ev.value} = $remainder; } """ case _ => s""" -${ctx.javaType(dataType)} r = $eval1 % $eval2; -if (r < 0) { - ${ev.value} = (r + $eval2) % $eval2; +${ctx.javaType(dataType)} $remainder = $eval1 % $eval2; +if ($remainder < 0) { + ${ev.value} = ($remainder + $eval2) % $eval2; } else { - ${ev.value} = r; + ${ev.value} = $remainder; } """ } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16514][SQL] Fix various regex codegen bugs
Repository: spark Updated Branches: refs/heads/branch-1.6 702178d1f -> 9808735e0 [SPARK-16514][SQL] Fix various regex codegen bugs ## What changes were proposed in this pull request? RegexExtract and RegexReplace currently crash on non-nullable input due use of a hard-coded local variable name (e.g. compiles fail with `java.lang.Exception: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 85, Column 26: Redefinition of local variable "m" `). This changes those variables to use fresh names, and also in a few other places. ## How was this patch tested? Unit tests. rxin Author: Eric LiangCloses #14168 from ericl/sc-3906. (cherry picked from commit 1c58fa905b6543d366d00b2e5394dfd633987f6d) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9808735e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9808735e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9808735e Branch: refs/heads/branch-1.6 Commit: 9808735e0ce91c68df4c1ce82c44543995d44aed Parents: 702178d Author: Eric Liang Authored: Tue Jul 12 23:09:02 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 12 23:09:31 2016 -0700 -- .../expressions/regexpExpressions.scala | 48 ++-- .../expressions/StringExpressionsSuite.scala| 6 +++ 2 files changed, 39 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9808735e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 9e484c5..154c7a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -104,10 +104,11 @@ case class Like(left: Expression, right: Expression) """ } } else { + val rightStr = ctx.freshName("rightStr") nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" - String rightStr = ${eval2}.toString(); - ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr)); + String $rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($escapeFunc($rightStr)); ${ev.value} = $pattern.matcher(${eval1}.toString()).matches(); """ }) @@ -152,10 +153,11 @@ case class RLike(left: Expression, right: Expression) """ } } else { + val rightStr = ctx.freshName("rightStr") nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" - String rightStr = ${eval2}.toString(); - ${patternClass} $pattern = ${patternClass}.compile(rightStr); + String $rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($rightStr); ${ev.value} = $pattern.matcher(${eval1}.toString()).find(0); """ }) @@ -248,6 +250,8 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio val classNamePattern = classOf[Pattern].getCanonicalName val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName +val matcher = ctx.freshName("matcher") + ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = null;") ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = null;") ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;") @@ -256,6 +260,12 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ctx.addMutableState(classNameStringBuffer, termResult, s"${termResult} = new $classNameStringBuffer();") +val setEvNotNull = if (nullable) { + s"${ev.isNull} = false;" +} else { + "" +} + nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => { s""" if (!$regexp.equals(${termLastRegex})) { @@ -269,14 +279,14 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ${termLastReplacement} = ${termLastReplacementInUTF8}.toString(); } ${termResult}.delete(0, ${termResult}.length()); - java.util.regex.Matcher m = ${termPattern}.matcher($subject.toString()); + java.util.regex.Matcher ${matcher} = ${termPattern}.matcher($subject.toString()); - while (m.find()) { -m.appendReplacement(${termResult},
spark git commit: [SPARK-16514][SQL] Fix various regex codegen bugs
Repository: spark Updated Branches: refs/heads/branch-2.0 4303d292b -> 41df62c59 [SPARK-16514][SQL] Fix various regex codegen bugs ## What changes were proposed in this pull request? RegexExtract and RegexReplace currently crash on non-nullable input due use of a hard-coded local variable name (e.g. compiles fail with `java.lang.Exception: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 85, Column 26: Redefinition of local variable "m" `). This changes those variables to use fresh names, and also in a few other places. ## How was this patch tested? Unit tests. rxin Author: Eric LiangCloses #14168 from ericl/sc-3906. (cherry picked from commit 1c58fa905b6543d366d00b2e5394dfd633987f6d) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41df62c5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41df62c5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41df62c5 Branch: refs/heads/branch-2.0 Commit: 41df62c595474d7afda6dbe76a558d8cb3be7ff2 Parents: 4303d29 Author: Eric Liang Authored: Tue Jul 12 23:09:02 2016 -0700 Committer: Reynold Xin Committed: Tue Jul 12 23:09:08 2016 -0700 -- .../expressions/regexpExpressions.scala | 48 ++-- .../expressions/StringExpressionsSuite.scala| 6 +++ 2 files changed, 39 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/41df62c5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 541b860..be82b3b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -108,10 +108,11 @@ case class Like(left: Expression, right: Expression) """) } } else { + val rightStr = ctx.freshName("rightStr") nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" - String rightStr = ${eval2}.toString(); - ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr)); + String $rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($escapeFunc($rightStr)); ${ev.value} = $pattern.matcher(${eval1}.toString()).matches(); """ }) @@ -157,10 +158,11 @@ case class RLike(left: Expression, right: Expression) """) } } else { + val rightStr = ctx.freshName("rightStr") nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" - String rightStr = ${eval2}.toString(); - ${patternClass} $pattern = ${patternClass}.compile(rightStr); + String $rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($rightStr); ${ev.value} = $pattern.matcher(${eval1}.toString()).find(0); """ }) @@ -259,6 +261,8 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio val classNamePattern = classOf[Pattern].getCanonicalName val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName +val matcher = ctx.freshName("matcher") + ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = null;") ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = null;") ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;") @@ -267,6 +271,12 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ctx.addMutableState(classNameStringBuffer, termResult, s"${termResult} = new $classNameStringBuffer();") +val setEvNotNull = if (nullable) { + s"${ev.isNull} = false;" +} else { + "" +} + nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => { s""" if (!$regexp.equals(${termLastRegex})) { @@ -280,14 +290,14 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio ${termLastReplacement} = ${termLastReplacementInUTF8}.toString(); } ${termResult}.delete(0, ${termResult}.length()); - java.util.regex.Matcher m = ${termPattern}.matcher($subject.toString()); + java.util.regex.Matcher ${matcher} = ${termPattern}.matcher($subject.toString()); - while (m.find()) { -m.appendReplacement(${termResult},