date:20160713

[2/2] spark git commit: Preparing development version 2.0.1-SNAPSHOT

2016-07-13 Thread pwendell

Preparing development version 2.0.1-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3ebecbb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3ebecbb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3ebecbb

Branch: refs/heads/branch-2.0
Commit: b3ebecbb7a5f2ce55ba0392bc59b26780ff69e13
Parents: 48d1fa3
Author: Patrick Wendell 
Authored: Wed Jul 13 22:32:55 2016 -0700
Committer: Patrick Wendell 
Committed: Wed Jul 13 22:32:55 2016 -0700

--
 assembly/pom.xml  | 2 +-
 common/network-common/pom.xml | 2 +-
 common/network-shuffle/pom.xml| 2 +-
 common/network-yarn/pom.xml   | 2 +-
 common/sketch/pom.xml | 2 +-
 common/tags/pom.xml   | 2 +-
 common/unsafe/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml   | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/java8-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml  | 2 +-
 external/kafka-0-10/pom.xml   | 2 +-
 external/kafka-0-8-assembly/pom.xml   | 2 +-
 external/kafka-0-8/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml | 2 +-
 external/kinesis-asl/pom.xml  | 2 +-
 external/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml| 2 +-
 launcher/pom.xml  | 2 +-
 mllib-local/pom.xml   | 2 +-
 mllib/pom.xml | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 34 files changed, 34 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 5f546bb..507ddc7 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.0
+2.0.1-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/common/network-common/pom.xml
--
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 2eaa810..bc3b0fe 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.0
+2.0.1-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/common/network-shuffle/pom.xml
--
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index f068d9d..2fb5835 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.0
+2.0.1-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/common/network-yarn/pom.xml
--
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index fd22188..07d9f1c 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.0
+2.0.1-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/common/sketch/pom.xml
--
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index a17aba5..5e02efd 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.0
+2.0.1-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b3ebecbb/common/tags/pom.xml
--
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 0bd8846..e7fc6a2 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark

[1/2] spark git commit: Preparing Spark release v2.0.0-rc3

2016-07-13 Thread pwendell

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 f6eda6b30 -> b3ebecbb7


Preparing Spark release v2.0.0-rc3


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/48d1fa3e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/48d1fa3e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/48d1fa3e

Branch: refs/heads/branch-2.0
Commit: 48d1fa3e736b2ffbb660839735cbb8867f00fee2
Parents: f6eda6b
Author: Patrick Wendell 
Authored: Wed Jul 13 22:32:45 2016 -0700
Committer: Patrick Wendell 
Committed: Wed Jul 13 22:32:45 2016 -0700

--
 assembly/pom.xml  | 2 +-
 common/network-common/pom.xml | 2 +-
 common/network-shuffle/pom.xml| 2 +-
 common/network-yarn/pom.xml   | 2 +-
 common/sketch/pom.xml | 2 +-
 common/tags/pom.xml   | 2 +-
 common/unsafe/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml   | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/java8-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml  | 2 +-
 external/kafka-0-10/pom.xml   | 2 +-
 external/kafka-0-8-assembly/pom.xml   | 2 +-
 external/kafka-0-8/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml | 2 +-
 external/kinesis-asl/pom.xml  | 2 +-
 external/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml| 2 +-
 launcher/pom.xml  | 2 +-
 mllib-local/pom.xml   | 2 +-
 mllib/pom.xml | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 34 files changed, 34 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 507ddc7..5f546bb 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.1-SNAPSHOT
+2.0.0
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/common/network-common/pom.xml
--
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index bc3b0fe..2eaa810 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.1-SNAPSHOT
+2.0.0
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/common/network-shuffle/pom.xml
--
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 2fb5835..f068d9d 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.1-SNAPSHOT
+2.0.0
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/common/network-yarn/pom.xml
--
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 07d9f1c..fd22188 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.1-SNAPSHOT
+2.0.0
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/common/sketch/pom.xml
--
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 5e02efd..a17aba5 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.1-SNAPSHOT
+2.0.0
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/48d1fa3e/common/tags/pom.xml
--
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index e7fc6a2..0bd8846 100644
---

[spark] Git Push Summary

2016-07-13 Thread pwendell

Repository: spark
Updated Tags:  refs/tags/v2.0.0-rc3 [created] 48d1fa3e7

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16503] SparkSession should provide Spark version

2016-07-13 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 5244f86cf -> f6eda6b30


[SPARK-16503] SparkSession should provide Spark version

## What changes were proposed in this pull request?

This patch enables SparkSession to provide spark version.

## How was this patch tested?

Manual test:

```
scala> sc.version
res0: String = 2.1.0-SNAPSHOT

scala> spark.version
res1: String = 2.1.0-SNAPSHOT
```

```
>>> sc.version
u'2.1.0-SNAPSHOT'
>>> spark.version
u'2.1.0-SNAPSHOT'
```

Author: Liwei Lin 

Closes #14165 from lw-lin/add-version.

(cherry picked from commit 39c836e976fcae51568bed5ebab28e148383b5d4)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f6eda6b3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f6eda6b3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f6eda6b3

Branch: refs/heads/branch-2.0
Commit: f6eda6b3020a5be0ff898111bf84c71e36e9433b
Parents: 5244f86
Author: Liwei Lin 
Authored: Wed Jul 13 22:30:46 2016 -0700
Committer: Reynold Xin 
Committed: Wed Jul 13 22:30:52 2016 -0700

--
 python/pyspark/sql/session.py   | 6 ++
 .../src/main/scala/org/apache/spark/sql/SparkSession.scala  | 9 -
 2 files changed, 14 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f6eda6b3/python/pyspark/sql/session.py
--
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index a360fbe..594f937 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -234,6 +234,12 @@ class SparkSession(object):
 
 @property
 @since(2.0)
+def version(self):
+"""The version of Spark on which this application is running."""
+return self._jsparkSession.version()
+
+@property
+@since(2.0)
 def conf(self):
 """Runtime configuration interface for Spark.
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f6eda6b3/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index a3fd39d..df0950d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -25,7 +25,7 @@ import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
 
-import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.{SPARK_VERSION, SparkConf, SparkContext}
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
@@ -79,6 +79,13 @@ class SparkSession private(
 
   sparkContext.assertNotStopped()
 
+  /**
+   * The version of Spark on which this application is running.
+   *
+   * @since 2.0.0
+   */
+  def version: String = SPARK_VERSION
+
   /* --- *
|  Session-related state  |
* --- */


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] Git Push Summary

2016-07-13 Thread rxin

Repository: spark
Updated Tags:  refs/tags/v2.0.0-rc3 [deleted] 47eb9a621

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[2/2] spark git commit: Preparing development version 2.0.1-SNAPSHOT

2016-07-13 Thread pwendell

Preparing development version 2.0.1-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5244f86c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5244f86c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5244f86c

Branch: refs/heads/branch-2.0
Commit: 5244f86cfc8532587b70cf1cc20b0684583b9c23
Parents: 47eb9a6
Author: Patrick Wendell 
Authored: Wed Jul 13 22:27:15 2016 -0700
Committer: Patrick Wendell 
Committed: Wed Jul 13 22:27:15 2016 -0700

--
 assembly/pom.xml  | 2 +-
 common/network-common/pom.xml | 2 +-
 common/network-shuffle/pom.xml| 2 +-
 common/network-yarn/pom.xml   | 2 +-
 common/sketch/pom.xml | 2 +-
 common/tags/pom.xml   | 2 +-
 common/unsafe/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml   | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/java8-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml  | 2 +-
 external/kafka-0-10/pom.xml   | 2 +-
 external/kafka-0-8-assembly/pom.xml   | 2 +-
 external/kafka-0-8/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml | 2 +-
 external/kinesis-asl/pom.xml  | 2 +-
 external/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml| 2 +-
 launcher/pom.xml  | 2 +-
 mllib-local/pom.xml   | 2 +-
 mllib/pom.xml | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 34 files changed, 34 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 5f546bb..507ddc7 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.0
+2.0.1-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/common/network-common/pom.xml
--
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 2eaa810..bc3b0fe 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.0
+2.0.1-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/common/network-shuffle/pom.xml
--
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index f068d9d..2fb5835 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.0
+2.0.1-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/common/network-yarn/pom.xml
--
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index fd22188..07d9f1c 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.0
+2.0.1-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/common/sketch/pom.xml
--
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index a17aba5..5e02efd 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.0
+2.0.1-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5244f86c/common/tags/pom.xml
--
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 0bd8846..e7fc6a2 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark

[spark] Git Push Summary

2016-07-13 Thread pwendell

Repository: spark
Updated Tags:  refs/tags/v2.0.0-rc3 [created] 47eb9a621

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/2] spark git commit: Preparing Spark release v2.0.0-rc3

2016-07-13 Thread pwendell

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 abb802359 -> 5244f86cf


Preparing Spark release v2.0.0-rc3


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47eb9a62
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47eb9a62
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47eb9a62

Branch: refs/heads/branch-2.0
Commit: 47eb9a621e1ca37ecd31fa03f8bb42b83c366c01
Parents: abb8023
Author: Patrick Wendell 
Authored: Wed Jul 13 22:27:07 2016 -0700
Committer: Patrick Wendell 
Committed: Wed Jul 13 22:27:07 2016 -0700

--
 assembly/pom.xml  | 2 +-
 common/network-common/pom.xml | 2 +-
 common/network-shuffle/pom.xml| 2 +-
 common/network-yarn/pom.xml   | 2 +-
 common/sketch/pom.xml | 2 +-
 common/tags/pom.xml   | 2 +-
 common/unsafe/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml   | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/java8-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml  | 2 +-
 external/kafka-0-10/pom.xml   | 2 +-
 external/kafka-0-8-assembly/pom.xml   | 2 +-
 external/kafka-0-8/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml | 2 +-
 external/kinesis-asl/pom.xml  | 2 +-
 external/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml| 2 +-
 launcher/pom.xml  | 2 +-
 mllib-local/pom.xml   | 2 +-
 mllib/pom.xml | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 34 files changed, 34 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 507ddc7..5f546bb 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.1-SNAPSHOT
+2.0.0
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/common/network-common/pom.xml
--
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index bc3b0fe..2eaa810 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.1-SNAPSHOT
+2.0.0
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/common/network-shuffle/pom.xml
--
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 2fb5835..f068d9d 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.1-SNAPSHOT
+2.0.0
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/common/network-yarn/pom.xml
--
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 07d9f1c..fd22188 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.1-SNAPSHOT
+2.0.0
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/common/sketch/pom.xml
--
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 5e02efd..a17aba5 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.11
-2.0.1-SNAPSHOT
+2.0.0
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/47eb9a62/common/tags/pom.xml
--
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index e7fc6a2..0bd8846 100644
---

spark git commit: [SPARK-16536][SQL][PYSPARK][MINOR] Expose `sql` in PySpark Shell

2016-07-13 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/master a5f51e216 -> 9c530576a


[SPARK-16536][SQL][PYSPARK][MINOR] Expose `sql` in PySpark Shell

## What changes were proposed in this pull request?

This PR exposes `sql` in PySpark Shell like Scala/R Shells for consistency.

**Background**
 * Scala
 ```scala
scala> sql("select 1 a")
res0: org.apache.spark.sql.DataFrame = [a: int]
```

 * R
 ```r
> sql("select 1")
SparkDataFrame[1:int]
```

**Before**
 * Python

 ```python
>>> sql("select 1 a")
Traceback (most recent call last):
  File "", line 1, in 
NameError: name 'sql' is not defined
```

**After**
 * Python

 ```python
>>> sql("select 1 a")
DataFrame[a: int]
```

## How was this patch tested?

Manual.

Author: Dongjoon Hyun 

Closes #14190 from dongjoon-hyun/SPARK-16536.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9c530576
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9c530576
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9c530576

Branch: refs/heads/master
Commit: 9c530576a44cbeb956db94e7fdd1fad50bd62973
Parents: a5f51e2
Author: Dongjoon Hyun 
Authored: Wed Jul 13 22:24:26 2016 -0700
Committer: Reynold Xin 
Committed: Wed Jul 13 22:24:26 2016 -0700

--
 python/pyspark/shell.py | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9c530576/python/pyspark/shell.py
--
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index ac5ce87..c1917d2 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -49,6 +49,7 @@ except TypeError:
 spark = SparkSession.builder.getOrCreate()
 
 sc = spark.sparkContext
+sql = spark.sql
 atexit.register(lambda: sc.stop())
 
 # for compatibility


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16485][ML][DOC] Fix privacy of GLM members, rename sqlDataTypes for ML, doc fixes

2016-07-13 Thread jkbradley

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 550d0e7dc -> abb802359


[SPARK-16485][ML][DOC] Fix privacy of GLM members, rename sqlDataTypes for ML, 
doc fixes

## What changes were proposed in this pull request?

Fixing issues found during 2.0 API checks:
* GeneralizedLinearRegressionModel: linkObj, familyObj, familyAndLink should 
not be exposed
* sqlDataTypes: name does not follow conventions. Do we need to expose it?
* Evaluator: inconsistent doc between evaluate and isLargerBetter
* MinMaxScaler: math rendering --> hard to make it great, but I'll change it a 
little
* GeneralizedLinearRegressionSummary: aic doc is incorrect --> will change to 
use more common name

## How was this patch tested?

Existing unit tests.  Docs generated locally.  (MinMaxScaler is improved a tiny 
bit.)

Author: Joseph K. Bradley 

Closes #14187 from jkbradley/final-api-check-2.0.

(cherry picked from commit a5f51e21627c1bcfc62829a3a962707abf41a452)
Signed-off-by: Joseph K. Bradley 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/abb80235
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/abb80235
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/abb80235

Branch: refs/heads/branch-2.0
Commit: abb8023599df4a9b5133accf547607eda5ca45d2
Parents: 550d0e7
Author: Joseph K. Bradley 
Authored: Wed Jul 13 15:40:44 2016 -0700
Committer: Joseph K. Bradley 
Committed: Wed Jul 13 15:40:53 2016 -0700

--
 .../apache/spark/ml/evaluation/Evaluator.scala  |  7 ++--
 .../apache/spark/ml/feature/MinMaxScaler.scala  |  4 +--
 .../apache/spark/ml/linalg/SQLDataTypes.scala   | 36 
 .../org/apache/spark/ml/linalg/dataTypes.scala  | 35 ---
 .../GeneralizedLinearRegression.scala   | 10 +++---
 .../spark/ml/linalg/JavaSQLDataTypesSuite.java  |  2 +-
 .../spark/ml/linalg/SQLDataTypesSuite.scala |  4 +--
 7 files changed, 51 insertions(+), 47 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala 
b/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
index 5f765c0..dfbc3e5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
@@ -30,7 +30,8 @@ import org.apache.spark.sql.Dataset
 abstract class Evaluator extends Params {
 
   /**
-   * Evaluates model output and returns a scalar metric (larger is better).
+   * Evaluates model output and returns a scalar metric.
+   * The value of [[isLargerBetter]] specifies whether larger values are 
better.
*
* @param dataset a dataset that contains labels/observations and 
predictions.
* @param paramMap parameter map that specifies the input columns and output 
metrics
@@ -42,7 +43,9 @@ abstract class Evaluator extends Params {
   }
 
   /**
-   * Evaluates the output.
+   * Evaluates model output and returns a scalar metric.
+   * The value of [[isLargerBetter]] specifies whether larger values are 
better.
+   *
* @param dataset a dataset that contains labels/observations and 
predictions.
* @return metric
*/

http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index 7b03f0c..9ed8d83 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -78,9 +78,9 @@ private[feature] trait MinMaxScalerParams extends Params with 
HasInputCol with H
  * statistics, which is also known as min-max normalization or Rescaling. The 
rescaled value for
  * feature E is calculated as,
  *
- * Rescaled(e_i) = \frac{e_i - E_{min}}{E_{max} - E_{min}} * (max - min) + min
+ * `Rescaled(e_i) = \frac{e_i - E_{min}}{E_{max} - E_{min}} * (max - min) + 
min`
  *
- * For the case E_{max} == E_{min}, Rescaled(e_i) = 0.5 * (max + min)
+ * For the case `E_{max} == E_{min}`, `Rescaled(e_i) = 0.5 * (max + min)`.
  * Note that since zero values will probably be transformed to non-zero 
values, output of the
  * transformer will be DenseVector even for sparse input.
  */

http://git-wip-us.apache.org/repos/asf/spark/blob/abb80235/mllib/src/main/scala/org/apache/spark/ml/linalg/SQLDataTypes.scala

spark git commit: [SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema

2016-07-13 Thread yhuai

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 9e3a59858 -> 550d0e7dc


[SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred 
Schema

 What changes were proposed in this pull request?
If we create a table pointing to a parquet/json datasets without specifying the 
schema, describe table command does not show the schema at all. It only shows 
`# Schema of this table is inferred at runtime`. In 1.6, describe table does 
show the schema of such a table.

~~For data source tables, to infer the schema, we need to load the data source 
tables at runtime. Thus, this PR calls the function `lookupRelation`.~~

For data source tables, we infer the schema before table creation. Thus, this 
PR set the inferred schema as the table schema when table creation.

 How was this patch tested?
Added test cases

Author: gatorsmile 

Closes #14148 from gatorsmile/describeSchema.

(cherry picked from commit c5ec879828369ec1d21acd7f18a792306634ff74)
Signed-off-by: Yin Huai 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/550d0e7d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/550d0e7d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/550d0e7d

Branch: refs/heads/branch-2.0
Commit: 550d0e7dc6339fac0fe3bb5a8d6038681fd3fec3
Parents: 9e3a598
Author: gatorsmile 
Authored: Wed Jul 13 15:23:37 2016 -0700
Committer: Yin Huai 
Committed: Wed Jul 13 15:23:59 2016 -0700

--
 .../spark/sql/execution/command/tables.scala| 28 +---
 .../spark/sql/hive/execution/HiveDDLSuite.scala | 16 ++-
 2 files changed, 22 insertions(+), 22 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/550d0e7d/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 1483604..b2300b4 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -413,29 +413,29 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
 } else {
   val metadata = catalog.getTableMetadata(table)
 
+  if (DDLUtils.isDatasourceTable(metadata)) {
+DDLUtils.getSchemaFromTableProperties(metadata) match {
+  case Some(userSpecifiedSchema) => 
describeSchema(userSpecifiedSchema, result)
+  case None => describeSchema(catalog.lookupRelation(table).schema, 
result)
+}
+  } else {
+describeSchema(metadata.schema, result)
+  }
+
   if (isExtended) {
 describeExtended(metadata, result)
   } else if (isFormatted) {
 describeFormatted(metadata, result)
   } else {
-describe(metadata, result)
+describePartitionInfo(metadata, result)
   }
 }
 
 result
   }
 
-  // Shows data columns and partitioned columns (if any)
-  private def describe(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
+  private def describePartitionInfo(table: CatalogTable, buffer: 
ArrayBuffer[Row]): Unit = {
 if (DDLUtils.isDatasourceTable(table)) {
-  val schema = DDLUtils.getSchemaFromTableProperties(table)
-
-  if (schema.isEmpty) {
-append(buffer, "# Schema of this table is inferred at runtime", "", "")
-  } else {
-schema.foreach(describeSchema(_, buffer))
-  }
-
   val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table)
   if (partCols.nonEmpty) {
 append(buffer, "# Partition Information", "", "")
@@ -443,8 +443,6 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
 partCols.foreach(col => append(buffer, col, "", ""))
   }
 } else {
-  describeSchema(table.schema, buffer)
-
   if (table.partitionColumns.nonEmpty) {
 append(buffer, "# Partition Information", "", "")
 append(buffer, s"# ${output.head.name}", output(1).name, 
output(2).name)
@@ -454,14 +452,14 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
   }
 
   private def describeExtended(table: CatalogTable, buffer: ArrayBuffer[Row]): 
Unit = {
-describe(table, buffer)
+describePartitionInfo(table, buffer)
 
 append(buffer, "", "", "")
 append(buffer, "# Detailed Table Information", table.toString, "")
   }
 
   private def describeFormatted(table: CatalogTable, buffer: 
ArrayBuffer[Row]): Unit = {
-describe(table, buffer)
+describePartitionInfo(table,

spark git commit: [SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema

2016-07-13 Thread yhuai

Repository: spark
Updated Branches:
  refs/heads/master fb2e8eeb0 -> c5ec87982


[SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred 
Schema

 What changes were proposed in this pull request?
If we create a table pointing to a parquet/json datasets without specifying the 
schema, describe table command does not show the schema at all. It only shows 
`# Schema of this table is inferred at runtime`. In 1.6, describe table does 
show the schema of such a table.

~~For data source tables, to infer the schema, we need to load the data source 
tables at runtime. Thus, this PR calls the function `lookupRelation`.~~

For data source tables, we infer the schema before table creation. Thus, this 
PR set the inferred schema as the table schema when table creation.

 How was this patch tested?
Added test cases

Author: gatorsmile 

Closes #14148 from gatorsmile/describeSchema.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c5ec8798
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c5ec8798
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c5ec8798

Branch: refs/heads/master
Commit: c5ec879828369ec1d21acd7f18a792306634ff74
Parents: fb2e8ee
Author: gatorsmile 
Authored: Wed Jul 13 15:23:37 2016 -0700
Committer: Yin Huai 
Committed: Wed Jul 13 15:23:37 2016 -0700

--
 .../spark/sql/execution/command/tables.scala| 28 +---
 .../spark/sql/hive/execution/HiveDDLSuite.scala | 16 ++-
 2 files changed, 22 insertions(+), 22 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c5ec8798/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 5c815df..6651c33 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -413,29 +413,29 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
 } else {
   val metadata = catalog.getTableMetadata(table)
 
+  if (DDLUtils.isDatasourceTable(metadata)) {
+DDLUtils.getSchemaFromTableProperties(metadata) match {
+  case Some(userSpecifiedSchema) => 
describeSchema(userSpecifiedSchema, result)
+  case None => describeSchema(catalog.lookupRelation(table).schema, 
result)
+}
+  } else {
+describeSchema(metadata.schema, result)
+  }
+
   if (isExtended) {
 describeExtended(metadata, result)
   } else if (isFormatted) {
 describeFormatted(metadata, result)
   } else {
-describe(metadata, result)
+describePartitionInfo(metadata, result)
   }
 }
 
 result
   }
 
-  // Shows data columns and partitioned columns (if any)
-  private def describe(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
+  private def describePartitionInfo(table: CatalogTable, buffer: 
ArrayBuffer[Row]): Unit = {
 if (DDLUtils.isDatasourceTable(table)) {
-  val schema = DDLUtils.getSchemaFromTableProperties(table)
-
-  if (schema.isEmpty) {
-append(buffer, "# Schema of this table is inferred at runtime", "", "")
-  } else {
-schema.foreach(describeSchema(_, buffer))
-  }
-
   val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table)
   if (partCols.nonEmpty) {
 append(buffer, "# Partition Information", "", "")
@@ -443,8 +443,6 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
 partCols.foreach(col => append(buffer, col, "", ""))
   }
 } else {
-  describeSchema(table.schema, buffer)
-
   if (table.partitionColumns.nonEmpty) {
 append(buffer, "# Partition Information", "", "")
 append(buffer, s"# ${output.head.name}", output(1).name, 
output(2).name)
@@ -454,14 +452,14 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
   }
 
   private def describeExtended(table: CatalogTable, buffer: ArrayBuffer[Row]): 
Unit = {
-describe(table, buffer)
+describePartitionInfo(table, buffer)
 
 append(buffer, "", "", "")
 append(buffer, "# Detailed Table Information", table.toString, "")
   }
 
   private def describeFormatted(table: CatalogTable, buffer: 
ArrayBuffer[Row]): Unit = {
-describe(table, buffer)
+describePartitionInfo(table, buffer)
 
 append(buffer, "", "", "")
 append(buffer, "# Detailed Table Information", "", "")

spark git commit: [SPARKR][DOCS][MINOR] R programming guide to include csv data source example

2016-07-13 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master b4baf086c -> fb2e8eeb0


[SPARKR][DOCS][MINOR] R programming guide to include csv data source example

## What changes were proposed in this pull request?

Minor documentation update for code example, code style, and missed reference 
to "sparkR.init"

## How was this patch tested?

manual

shivaram

Author: Felix Cheung 

Closes #14178 from felixcheung/rcsvprogrammingguide.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb2e8eeb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb2e8eeb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb2e8eeb

Branch: refs/heads/master
Commit: fb2e8eeb0b1e56bea535165f7a3bec6558b3f4a3
Parents: b4baf08
Author: Felix Cheung 
Authored: Wed Jul 13 15:09:23 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Jul 13 15:09:23 2016 -0700

--
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  2 +-
 docs/sparkr.md| 27 +-
 2 files changed, 19 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fb2e8eeb/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index fdd6020..e61fa41 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -237,7 +237,7 @@ test_that("read csv as DataFrame", {
"Empty,Dummy,Placeholder")
   writeLines(mockLinesCsv, csvPath)
 
-  df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", 
na.string = "Empty")
+  df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", 
na.strings = "Empty")
   expect_equal(count(df2), 4)
   withoutna2 <- na.omit(df2, how = "any", cols = "year")
   expect_equal(count(withoutna2), 3)

http://git-wip-us.apache.org/repos/asf/spark/blob/fb2e8eeb/docs/sparkr.md
--
diff --git a/docs/sparkr.md b/docs/sparkr.md
index b4acb23..9fda0ec 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -111,19 +111,17 @@ head(df)
 SparkR supports operating on a variety of data sources through the 
`SparkDataFrame` interface. This section describes the general methods for 
loading and saving data using Data Sources. You can check the Spark SQL 
programming guide for more [specific 
options](sql-programming-guide.html#manually-specifying-options) that are 
available for the built-in data sources.
 
 The general method for creating SparkDataFrames from data sources is 
`read.df`. This method takes in the path for the file to load and the type of 
data source, and the currently active SparkSession will be used automatically. 
SparkR supports reading JSON, CSV and Parquet files natively and through [Spark 
Packages](http://spark-packages.org/) you can find data source connectors for 
popular file formats like 
[Avro](http://spark-packages.org/package/databricks/spark-avro). These packages 
can either be added by
-specifying `--packages` with `spark-submit` or `sparkR` commands, or if 
creating context through `init`
-you can specify the packages with the `packages` argument.
+specifying `--packages` with `spark-submit` or `sparkR` commands, or if 
initializing SparkSession with `sparkPackages` parameter when in an interactive 
R shell or from RStudio.
 
 
 {% highlight r %}
-sc <- sparkR.session(sparkPackages="com.databricks:spark-avro_2.11:3.0.0")
+sc <- sparkR.session(sparkPackages = "com.databricks:spark-avro_2.11:3.0.0")
 {% endhighlight %}
 
 
 We can see how to use data sources using an example JSON input file. Note that 
the file that is used here is _not_ a typical JSON file. Each line in the file 
must contain a separate, self-contained valid JSON object. As a consequence, a 
regular multi-line JSON file will most often fail.
 
 
-
 {% highlight r %}
 people <- read.df("./examples/src/main/resources/people.json", "json")
 head(people)
@@ -138,6 +136,18 @@ printSchema(people)
 #  |-- age: long (nullable = true)
 #  |-- name: string (nullable = true)
 
+# Similarly, multiple files can be read with read.json
+people <- read.json(c("./examples/src/main/resources/people.json", 
"./examples/src/main/resources/people2.json"))
+
+{% endhighlight %}
+
+
+The data sources API natively supports CSV formatted input files. For more 
information please refer to SparkR [read.df](api/R/read.df.html) API 
documentation.
+
+
+{% highlight r %}
+df <- read.df(csvPath, "csv", header = "true", inferSchema = "true", 
na.strings = "NA")
+
 {% endhighlight %}
 
 
@@ -146,7 +156,7 @@ to a Parquet file using

spark git commit: [SPARKR][MINOR] R examples and test updates

2016-07-13 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/master 51a6706b1 -> b4baf086c


[SPARKR][MINOR] R examples and test updates

## What changes were proposed in this pull request?

Minor example updates

## How was this patch tested?

manual

shivaram

Author: Felix Cheung 

Closes #14171 from felixcheung/rexample.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4baf086
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4baf086
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4baf086

Branch: refs/heads/master
Commit: b4baf086ca380a46d953f2710184ad9eee3a045e
Parents: 51a6706
Author: Felix Cheung 
Authored: Wed Jul 13 13:33:34 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Jul 13 13:33:34 2016 -0700

--
 R/pkg/inst/tests/testthat/jarTest.R   | 2 +-
 R/pkg/inst/tests/testthat/packageInAJarTest.R | 2 +-
 examples/src/main/r/RSparkSQLExample.R| 3 +++
 examples/src/main/r/dataframe.R   | 2 +-
 4 files changed, 6 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b4baf086/R/pkg/inst/tests/testthat/jarTest.R
--
diff --git a/R/pkg/inst/tests/testthat/jarTest.R 
b/R/pkg/inst/tests/testthat/jarTest.R
index 84e4845..51754a4 100644
--- a/R/pkg/inst/tests/testthat/jarTest.R
+++ b/R/pkg/inst/tests/testthat/jarTest.R
@@ -16,7 +16,7 @@
 #
 library(SparkR)
 
-sparkSession <- sparkR.session()
+sparkR.session()
 
 helloTest <- SparkR:::callJStatic("sparkR.test.hello",
   "helloWorld",

http://git-wip-us.apache.org/repos/asf/spark/blob/b4baf086/R/pkg/inst/tests/testthat/packageInAJarTest.R
--
diff --git a/R/pkg/inst/tests/testthat/packageInAJarTest.R 
b/R/pkg/inst/tests/testthat/packageInAJarTest.R
index 940c91f..4bc935c 100644
--- a/R/pkg/inst/tests/testthat/packageInAJarTest.R
+++ b/R/pkg/inst/tests/testthat/packageInAJarTest.R
@@ -17,7 +17,7 @@
 library(SparkR)
 library(sparkPackageTest)
 
-sparkSession <- sparkR.session()
+sparkR.session()
 
 run1 <- myfunc(5L)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b4baf086/examples/src/main/r/RSparkSQLExample.R
--
diff --git a/examples/src/main/r/RSparkSQLExample.R 
b/examples/src/main/r/RSparkSQLExample.R
index eba3f1b..f20875c 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -195,3 +195,6 @@ results <- collect(sql("FROM src SELECT key, value"))
 # $example on:jdbc$
 df <- read.jdbc("jdbc:postgresql:dbserver", "schema.tablename", user = 
"username", password = "password")
 # $example off:jdbc$
+
+# Stop the SparkSession now
+sparkR.session.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/b4baf086/examples/src/main/r/dataframe.R
--
diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R
index 295f9b4..82b85f2 100644
--- a/examples/src/main/r/dataframe.R
+++ b/examples/src/main/r/dataframe.R
@@ -18,7 +18,7 @@
 library(SparkR)
 
 # Initialize SparkSession
-sc <- sparkR.session(appName = "SparkR-DataFrame-example")
+sparkR.session(appName = "SparkR-DataFrame-example")
 
 # Create a simple local data.frame
 localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][MINOR] R examples and test updates

2016-07-13 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 86adc5cfb -> 18255a934


[SPARKR][MINOR] R examples and test updates

## What changes were proposed in this pull request?

Minor example updates

## How was this patch tested?

manual

shivaram

Author: Felix Cheung 

Closes #14171 from felixcheung/rexample.

(cherry picked from commit b4baf086ca380a46d953f2710184ad9eee3a045e)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/18255a93
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/18255a93
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/18255a93

Branch: refs/heads/branch-2.0
Commit: 18255a9345dd711bf630993c582511efa74b7919
Parents: 86adc5c
Author: Felix Cheung 
Authored: Wed Jul 13 13:33:34 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Jul 13 13:33:47 2016 -0700

--
 R/pkg/inst/tests/testthat/jarTest.R   | 2 +-
 R/pkg/inst/tests/testthat/packageInAJarTest.R | 2 +-
 examples/src/main/r/RSparkSQLExample.R| 3 +++
 examples/src/main/r/dataframe.R   | 2 +-
 4 files changed, 6 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/18255a93/R/pkg/inst/tests/testthat/jarTest.R
--
diff --git a/R/pkg/inst/tests/testthat/jarTest.R 
b/R/pkg/inst/tests/testthat/jarTest.R
index 84e4845..51754a4 100644
--- a/R/pkg/inst/tests/testthat/jarTest.R
+++ b/R/pkg/inst/tests/testthat/jarTest.R
@@ -16,7 +16,7 @@
 #
 library(SparkR)
 
-sparkSession <- sparkR.session()
+sparkR.session()
 
 helloTest <- SparkR:::callJStatic("sparkR.test.hello",
   "helloWorld",

http://git-wip-us.apache.org/repos/asf/spark/blob/18255a93/R/pkg/inst/tests/testthat/packageInAJarTest.R
--
diff --git a/R/pkg/inst/tests/testthat/packageInAJarTest.R 
b/R/pkg/inst/tests/testthat/packageInAJarTest.R
index 940c91f..4bc935c 100644
--- a/R/pkg/inst/tests/testthat/packageInAJarTest.R
+++ b/R/pkg/inst/tests/testthat/packageInAJarTest.R
@@ -17,7 +17,7 @@
 library(SparkR)
 library(sparkPackageTest)
 
-sparkSession <- sparkR.session()
+sparkR.session()
 
 run1 <- myfunc(5L)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/18255a93/examples/src/main/r/RSparkSQLExample.R
--
diff --git a/examples/src/main/r/RSparkSQLExample.R 
b/examples/src/main/r/RSparkSQLExample.R
index eba3f1b..f20875c 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -195,3 +195,6 @@ results <- collect(sql("FROM src SELECT key, value"))
 # $example on:jdbc$
 df <- read.jdbc("jdbc:postgresql:dbserver", "schema.tablename", user = 
"username", password = "password")
 # $example off:jdbc$
+
+# Stop the SparkSession now
+sparkR.session.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/18255a93/examples/src/main/r/dataframe.R
--
diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R
index 295f9b4..82b85f2 100644
--- a/examples/src/main/r/dataframe.R
+++ b/examples/src/main/r/dataframe.R
@@ -18,7 +18,7 @@
 library(SparkR)
 
 # Initialize SparkSession
-sc <- sparkR.session(appName = "SparkR-DataFrame-example")
+sparkR.session(appName = "SparkR-DataFrame-example")
 
 # Create a simple local data.frame
 localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16114][SQL] updated structured streaming guide

2016-07-13 Thread tdas

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 7de183d97 -> 86adc5cfb


[SPARK-16114][SQL] updated structured streaming guide

## What changes were proposed in this pull request?

Updated structured streaming programming guide with new windowed example.

## How was this patch tested?

Docs

Author: James Thomas 

Closes #14183 from jjthomas/ss_docs_update.

(cherry picked from commit 51a6706b1339bb761602e33276a469f71be2cd90)
Signed-off-by: Tathagata Das 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/86adc5cf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/86adc5cf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/86adc5cf

Branch: refs/heads/branch-2.0
Commit: 86adc5cfbe286eb4d6071ec9ee09b6d0960a8509
Parents: 7de183d
Author: James Thomas 
Authored: Wed Jul 13 13:26:23 2016 -0700
Committer: Tathagata Das 
Committed: Wed Jul 13 13:26:34 2016 -0700

--
 docs/structured-streaming-programming-guide.md | 49 ++---
 1 file changed, 23 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/86adc5cf/docs/structured-streaming-programming-guide.md
--
diff --git a/docs/structured-streaming-programming-guide.md 
b/docs/structured-streaming-programming-guide.md
index 7949396..3ef39e4 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -626,52 +626,49 @@ The result tables would look something like the following.
 
 ![Window Operations](img/structured-streaming-window.png)
 
-Since this windowing is similar to grouping, in code, you can use `groupBy()` 
and `window()` operations to express windowed aggregations.
+Since this windowing is similar to grouping, in code, you can use `groupBy()` 
and `window()` operations to express windowed aggregations. You can see the 
full code for the below examples in
+[Scala]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala)/
+[Java]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCountWindowed.java)/
+[Python]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py).
 
 
 
 
 {% highlight scala %}
-// Number of events in every 1 minute time windows
-df.groupBy(window(df.col("time"), "1 minute"))
-  .count()
+import spark.implicits._
 
+val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: 
String }
 
-// Average number of events for each device type in every 1 minute time windows
-df.groupBy(
- df.col("type"),
- window(df.col("time"), "1 minute"))
-  .avg("signal")
+// Group the data by window and word and compute the count of each group
+val windowedCounts = words.groupBy(
+  window($"timestamp", "10 minutes", "5 minutes"),
+  $"word"
+).count()
 {% endhighlight %}
 
 
 
 
 {% highlight java %}
-import static org.apache.spark.sql.functions.window;
-
-// Number of events in every 1 minute time windows
-df.groupBy(window(df.col("time"), "1 minute"))
-  .count();
-
-// Average number of events for each device type in every 1 minute time windows
-df.groupBy(
- df.col("type"),
- window(df.col("time"), "1 minute"))
-  .avg("signal");
+Dataset words = ... // streaming DataFrame of schema { timestamp: 
Timestamp, word: String }
 
+// Group the data by window and word and compute the count of each group
+Dataset windowedCounts = words.groupBy(
+  functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
+  words.col("word")
+).count();
 {% endhighlight %}
 
 
 
 {% highlight python %}
-from pyspark.sql.functions import window
-
-# Number of events in every 1 minute time windows
-df.groupBy(window("time", "1 minute")).count()
+words = ... # streaming DataFrame of schema { timestamp: Timestamp, word: 
String }
 
-# Average number of events for each device type in every 1 minute time windows
-df.groupBy("type", window("time", "1 minute")).avg("signal")
+# Group the data by window and word and compute the count of each group
+windowedCounts = words.groupBy(
+window(words.timestamp, '10 minutes', '5 minutes'),
+words.word
+).count()
 {% endhighlight %}
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16114][SQL] updated structured streaming guide

2016-07-13 Thread tdas

Repository: spark
Updated Branches:
  refs/heads/master 0744d84c9 -> 51a6706b1


[SPARK-16114][SQL] updated structured streaming guide

## What changes were proposed in this pull request?

Updated structured streaming programming guide with new windowed example.

## How was this patch tested?

Docs

Author: James Thomas 

Closes #14183 from jjthomas/ss_docs_update.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51a6706b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51a6706b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51a6706b

Branch: refs/heads/master
Commit: 51a6706b1339bb761602e33276a469f71be2cd90
Parents: 0744d84
Author: James Thomas 
Authored: Wed Jul 13 13:26:23 2016 -0700
Committer: Tathagata Das 
Committed: Wed Jul 13 13:26:23 2016 -0700

--
 docs/structured-streaming-programming-guide.md | 49 ++---
 1 file changed, 23 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/51a6706b/docs/structured-streaming-programming-guide.md
--
diff --git a/docs/structured-streaming-programming-guide.md 
b/docs/structured-streaming-programming-guide.md
index 7949396..3ef39e4 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -626,52 +626,49 @@ The result tables would look something like the following.
 
 ![Window Operations](img/structured-streaming-window.png)
 
-Since this windowing is similar to grouping, in code, you can use `groupBy()` 
and `window()` operations to express windowed aggregations.
+Since this windowing is similar to grouping, in code, you can use `groupBy()` 
and `window()` operations to express windowed aggregations. You can see the 
full code for the below examples in
+[Scala]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala)/
+[Java]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCountWindowed.java)/
+[Python]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py).
 
 
 
 
 {% highlight scala %}
-// Number of events in every 1 minute time windows
-df.groupBy(window(df.col("time"), "1 minute"))
-  .count()
+import spark.implicits._
 
+val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: 
String }
 
-// Average number of events for each device type in every 1 minute time windows
-df.groupBy(
- df.col("type"),
- window(df.col("time"), "1 minute"))
-  .avg("signal")
+// Group the data by window and word and compute the count of each group
+val windowedCounts = words.groupBy(
+  window($"timestamp", "10 minutes", "5 minutes"),
+  $"word"
+).count()
 {% endhighlight %}
 
 
 
 
 {% highlight java %}
-import static org.apache.spark.sql.functions.window;
-
-// Number of events in every 1 minute time windows
-df.groupBy(window(df.col("time"), "1 minute"))
-  .count();
-
-// Average number of events for each device type in every 1 minute time windows
-df.groupBy(
- df.col("type"),
- window(df.col("time"), "1 minute"))
-  .avg("signal");
+Dataset words = ... // streaming DataFrame of schema { timestamp: 
Timestamp, word: String }
 
+// Group the data by window and word and compute the count of each group
+Dataset windowedCounts = words.groupBy(
+  functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
+  words.col("word")
+).count();
 {% endhighlight %}
 
 
 
 {% highlight python %}
-from pyspark.sql.functions import window
-
-# Number of events in every 1 minute time windows
-df.groupBy(window("time", "1 minute")).count()
+words = ... # streaming DataFrame of schema { timestamp: Timestamp, word: 
String }
 
-# Average number of events for each device type in every 1 minute time windows
-df.groupBy("type", window("time", "1 minute")).avg("signal")
+# Group the data by window and word and compute the count of each group
+windowedCounts = words.groupBy(
+window(words.timestamp, '10 minutes', '5 minutes'),
+words.word
+).count()
 {% endhighlight %}
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16531][SQL][TEST] Remove timezone setting from DataFrameTimeWindowingSuite

2016-07-13 Thread marmbrus

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 2e97f3a08 -> 7de183d97


[SPARK-16531][SQL][TEST] Remove timezone setting from 
DataFrameTimeWindowingSuite

## What changes were proposed in this pull request?

It's unnecessary. `QueryTest` already sets it.

Author: Burak Yavuz 

Closes #14170 from brkyvz/test-tz.

(cherry picked from commit 0744d84c91d6e494dea77a35e6410bc4b1849e71)
Signed-off-by: Michael Armbrust 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7de183d9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7de183d9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7de183d9

Branch: refs/heads/branch-2.0
Commit: 7de183d975c1a46bde6a9020b339673d953dd1a1
Parents: 2e97f3a
Author: Burak Yavuz 
Authored: Wed Jul 13 12:54:57 2016 -0700
Committer: Michael Armbrust 
Committed: Wed Jul 13 12:55:11 2016 -0700

--
 .../apache/spark/sql/DataFrameTimeWindowingSuite.scala| 10 --
 1 file changed, 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7de183d9/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
index a15b4e1..4296ec5 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
@@ -29,16 +29,6 @@ class DataFrameTimeWindowingSuite extends QueryTest with 
SharedSQLContext with B
 
   import testImplicits._
 
-  override def beforeEach(): Unit = {
-super.beforeEach()
-TimeZone.setDefault(TimeZone.getTimeZone("UTC"))
-  }
-
-  override def afterEach(): Unit = {
-super.beforeEach()
-TimeZone.setDefault(null)
-  }
-
   test("tumbling window groupBy statement") {
 val df = Seq(
   ("2016-03-27 19:39:34", 1, "a"),


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16531][SQL][TEST] Remove timezone setting from DataFrameTimeWindowingSuite

2016-07-13 Thread marmbrus

Repository: spark
Updated Branches:
  refs/heads/master 01f09b161 -> 0744d84c9


[SPARK-16531][SQL][TEST] Remove timezone setting from 
DataFrameTimeWindowingSuite

## What changes were proposed in this pull request?

It's unnecessary. `QueryTest` already sets it.

Author: Burak Yavuz 

Closes #14170 from brkyvz/test-tz.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0744d84c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0744d84c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0744d84c

Branch: refs/heads/master
Commit: 0744d84c91d6e494dea77a35e6410bc4b1849e71
Parents: 01f09b1
Author: Burak Yavuz 
Authored: Wed Jul 13 12:54:57 2016 -0700
Committer: Michael Armbrust 
Committed: Wed Jul 13 12:54:57 2016 -0700

--
 .../apache/spark/sql/DataFrameTimeWindowingSuite.scala| 10 --
 1 file changed, 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0744d84c/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
index a15b4e1..4296ec5 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
@@ -29,16 +29,6 @@ class DataFrameTimeWindowingSuite extends QueryTest with 
SharedSQLContext with B
 
   import testImplicits._
 
-  override def beforeEach(): Unit = {
-super.beforeEach()
-TimeZone.setDefault(TimeZone.getTimeZone("UTC"))
-  }
-
-  override def afterEach(): Unit = {
-super.beforeEach()
-TimeZone.setDefault(null)
-  }
-
   test("tumbling window groupBy statement") {
 val df = Seq(
   ("2016-03-27 19:39:34", 1, "a"),


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/2] spark git commit: [SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML

2016-07-13 Thread jkbradley

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 90f0e8132 -> 2e97f3a08


http://git-wip-us.apache.org/repos/asf/spark/blob/2e97f3a0/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index 91edcf2..f1664ce 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -22,7 +22,7 @@ import java.util.Random
 import scala.annotation.tailrec
 import scala.collection.mutable
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
@@ -31,8 +31,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
 /**
- * :: Experimental ::
- *
  * A bisecting k-means algorithm based on the paper "A comparison of document 
clustering techniques"
  * by Steinbach, Karypis, and Kumar, with modification to fit Spark.
  * The algorithm starts from a single cluster that contains all points.
@@ -54,7 +52,6 @@ import org.apache.spark.storage.StorageLevel
  * KDD Workshop on Text Mining, 2000.]]
  */
 @Since("1.6.0")
-@Experimental
 class BisectingKMeans private (
 private var k: Int,
 private var maxIterations: Int,
@@ -398,8 +395,6 @@ private object BisectingKMeans extends Serializable {
 }
 
 /**
- * :: Experimental ::
- *
  * Represents a node in a clustering tree.
  *
  * @param index node index, negative for internal nodes and non-negative for 
leaf nodes
@@ -411,7 +406,6 @@ private object BisectingKMeans extends Serializable {
  * @param children children nodes
  */
 @Since("1.6.0")
-@Experimental
 private[clustering] class ClusteringTreeNode private[clustering] (
 val index: Int,
 val size: Long,

http://git-wip-us.apache.org/repos/asf/spark/blob/2e97f3a0/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
index 11fd940..8438015 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
@@ -23,7 +23,7 @@ import org.json4s.jackson.JsonMethods._
 import org.json4s.JsonDSL._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.Vector
@@ -32,8 +32,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SparkSession}
 
 /**
- * :: Experimental ::
- *
  * Clustering model produced by [[BisectingKMeans]].
  * The prediction is done level-by-level from the root node to a leaf node, 
and at each node among
  * its children the closest to the input point is selected.
@@ -41,7 +39,6 @@ import org.apache.spark.sql.{Row, SparkSession}
  * @param root the root node of the clustering tree
  */
 @Since("1.6.0")
-@Experimental
 class BisectingKMeansModel private[clustering] (
 private[clustering] val root: ClusteringTreeNode
   ) extends Serializable with Saveable with Logging {

http://git-wip-us.apache.org/repos/asf/spark/blob/2e97f3a0/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index d295826..9ebba1d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
 import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId}
 import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors}
@@ -426,13 +426,10 @@ class LocalLDAModel private[spark] (
 }
 
 /**
- * :: Experimental ::
- *
  * Local (non-distributed) model fitted by [[LDA]].
  *
  * This model stores the inferred topics only; it does not store info about 
the training dataset.
  */

[1/2] spark git commit: [SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML

2016-07-13 Thread jkbradley

Repository: spark
Updated Branches:
  refs/heads/master d8220c1e5 -> 01f09b161


http://git-wip-us.apache.org/repos/asf/spark/blob/01f09b16/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index 91edcf2..f1664ce 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -22,7 +22,7 @@ import java.util.Random
 import scala.annotation.tailrec
 import scala.collection.mutable
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
@@ -31,8 +31,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
 /**
- * :: Experimental ::
- *
  * A bisecting k-means algorithm based on the paper "A comparison of document 
clustering techniques"
  * by Steinbach, Karypis, and Kumar, with modification to fit Spark.
  * The algorithm starts from a single cluster that contains all points.
@@ -54,7 +52,6 @@ import org.apache.spark.storage.StorageLevel
  * KDD Workshop on Text Mining, 2000.]]
  */
 @Since("1.6.0")
-@Experimental
 class BisectingKMeans private (
 private var k: Int,
 private var maxIterations: Int,
@@ -398,8 +395,6 @@ private object BisectingKMeans extends Serializable {
 }
 
 /**
- * :: Experimental ::
- *
  * Represents a node in a clustering tree.
  *
  * @param index node index, negative for internal nodes and non-negative for 
leaf nodes
@@ -411,7 +406,6 @@ private object BisectingKMeans extends Serializable {
  * @param children children nodes
  */
 @Since("1.6.0")
-@Experimental
 private[clustering] class ClusteringTreeNode private[clustering] (
 val index: Int,
 val size: Long,

http://git-wip-us.apache.org/repos/asf/spark/blob/01f09b16/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
index 11fd940..8438015 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
@@ -23,7 +23,7 @@ import org.json4s.jackson.JsonMethods._
 import org.json4s.JsonDSL._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.Vector
@@ -32,8 +32,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SparkSession}
 
 /**
- * :: Experimental ::
- *
  * Clustering model produced by [[BisectingKMeans]].
  * The prediction is done level-by-level from the root node to a leaf node, 
and at each node among
  * its children the closest to the input point is selected.
@@ -41,7 +39,6 @@ import org.apache.spark.sql.{Row, SparkSession}
  * @param root the root node of the clustering tree
  */
 @Since("1.6.0")
-@Experimental
 class BisectingKMeansModel private[clustering] (
 private[clustering] val root: ClusteringTreeNode
   ) extends Serializable with Saveable with Logging {

http://git-wip-us.apache.org/repos/asf/spark/blob/01f09b16/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index d295826..9ebba1d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
 import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId}
 import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors}
@@ -426,13 +426,10 @@ class LocalLDAModel private[spark] (
 }
 
 /**
- * :: Experimental ::
- *
  * Local (non-distributed) model fitted by [[LDA]].
  *
  * This model stores the inferred topics only; it does not store info about 
the training dataset.
  */
-@Experimental

[2/2] spark git commit: [SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML

2016-07-13 Thread jkbradley

[SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit 
for ML

## What changes were proposed in this pull request?

General decisions to follow, except where noted:
* spark.mllib, pyspark.mllib: Remove all Experimental annotations.  Leave 
DeveloperApi annotations alone.
* spark.ml, pyspark.ml
** Annotate Estimator-Model pairs of classes and companion objects the same way.
** For all algorithms marked Experimental with Since tag <= 1.6, remove 
Experimental annotation.
** For all algorithms marked Experimental with Since tag = 2.0, leave 
Experimental annotation.
* DeveloperApi annotations are left alone, except where noted.
* No changes to which types are sealed.

Exceptions where I am leaving items Experimental in spark.ml, pyspark.ml, 
mainly because the items are new:
* Model Summary classes
* MLWriter, MLReader, MLWritable, MLReadable
* Evaluator and subclasses: There is discussion of changes around evaluating 
multiple metrics at once for efficiency.
* RFormula: Its behavior may need to change slightly to match R in edge cases.
* AFTSurvivalRegression
* MultilayerPerceptronClassifier

DeveloperApi changes:
* ml.tree.Node, ml.tree.Split, and subclasses should no longer be DeveloperApi

## How was this patch tested?

N/A

Note to reviewers:
* spark.ml.clustering.LDA underwent significant changes (additional methods), 
so let me know if you want me to leave it Experimental.
* Be careful to check for cases where a class should no longer be Experimental 
but has an Experimental method, val, or other feature.  I did not find such 
cases, but please verify.

Author: Joseph K. Bradley 

Closes #14147 from jkbradley/experimental-audit.

(cherry picked from commit 01f09b161217193b797c8c85969d17054c958615)
Signed-off-by: Joseph K. Bradley 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2e97f3a0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2e97f3a0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2e97f3a0

Branch: refs/heads/branch-2.0
Commit: 2e97f3a08e3b48ce8ad0d669ef844210d0a3d2be
Parents: 90f0e81
Author: Joseph K. Bradley 
Authored: Wed Jul 13 12:33:39 2016 -0700
Committer: Joseph K. Bradley 
Committed: Wed Jul 13 12:34:15 2016 -0700

--
 .../scala/org/apache/spark/ml/Pipeline.scala|  6 +-
 .../classification/DecisionTreeClassifier.scala |  7 +--
 .../spark/ml/classification/GBTClassifier.scala |  7 +--
 .../ml/classification/LogisticRegression.scala  |  4 --
 .../spark/ml/classification/NaiveBayes.scala|  6 +-
 .../spark/ml/classification/OneVsRest.scala |  7 +--
 .../classification/RandomForestClassifier.scala |  7 +--
 .../org/apache/spark/ml/feature/Binarizer.scala |  4 +-
 .../apache/spark/ml/feature/Bucketizer.scala|  4 +-
 .../apache/spark/ml/feature/ChiSqSelector.scala |  6 +-
 .../spark/ml/feature/CountVectorizer.scala  |  6 +-
 .../scala/org/apache/spark/ml/feature/DCT.scala |  4 +-
 .../spark/ml/feature/ElementwiseProduct.scala   |  4 +-
 .../org/apache/spark/ml/feature/HashingTF.scala |  4 +-
 .../scala/org/apache/spark/ml/feature/IDF.scala |  6 +-
 .../apache/spark/ml/feature/Interaction.scala   |  4 +-
 .../apache/spark/ml/feature/LabeledPoint.scala  |  2 +
 .../apache/spark/ml/feature/MinMaxScaler.scala  |  6 +-
 .../org/apache/spark/ml/feature/NGram.scala |  4 +-
 .../apache/spark/ml/feature/Normalizer.scala|  4 +-
 .../apache/spark/ml/feature/OneHotEncoder.scala |  4 +-
 .../scala/org/apache/spark/ml/feature/PCA.scala |  7 +--
 .../spark/ml/feature/PolynomialExpansion.scala  |  4 +-
 .../spark/ml/feature/QuantileDiscretizer.scala  |  4 +-
 .../spark/ml/feature/SQLTransformer.scala   |  4 +-
 .../spark/ml/feature/StandardScaler.scala   |  6 +-
 .../spark/ml/feature/StopWordsRemover.scala |  4 +-
 .../apache/spark/ml/feature/StringIndexer.scala |  8 +--
 .../org/apache/spark/ml/feature/Tokenizer.scala |  6 +-
 .../spark/ml/feature/VectorAssembler.scala  |  4 +-
 .../apache/spark/ml/feature/VectorIndexer.scala |  6 +-
 .../apache/spark/ml/feature/VectorSlicer.scala  |  4 +-
 .../org/apache/spark/ml/feature/Word2Vec.scala  |  7 +--
 .../org/apache/spark/ml/param/params.scala  |  9 +--
 .../apache/spark/ml/recommendation/ALS.scala|  8 +--
 .../ml/regression/DecisionTreeRegressor.scala   |  7 +--
 .../spark/ml/regression/GBTRegressor.scala  |  6 --
 .../ml/regression/IsotonicRegression.scala  |  6 +-
 .../spark/ml/regression/LinearRegression.scala  |  4 --
 .../ml/regression/RandomForestRegressor.scala   |  7 +--
 .../scala/org/apache/spark/ml/tree/Node.scala   | 10 +--
 .../scala/org/apache/spark/ml/tree/Split.scala  |  8 +--
 .../apache/spark/ml/tuning/CrossValidator.scala |  6 +-
 .../spark/ml/tuning/ParamGridBuilder.scala  |  4 +-

[2/2] spark git commit: [SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML

2016-07-13 Thread jkbradley

[SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit 
for ML

## What changes were proposed in this pull request?

General decisions to follow, except where noted:
* spark.mllib, pyspark.mllib: Remove all Experimental annotations.  Leave 
DeveloperApi annotations alone.
* spark.ml, pyspark.ml
** Annotate Estimator-Model pairs of classes and companion objects the same way.
** For all algorithms marked Experimental with Since tag <= 1.6, remove 
Experimental annotation.
** For all algorithms marked Experimental with Since tag = 2.0, leave 
Experimental annotation.
* DeveloperApi annotations are left alone, except where noted.
* No changes to which types are sealed.

Exceptions where I am leaving items Experimental in spark.ml, pyspark.ml, 
mainly because the items are new:
* Model Summary classes
* MLWriter, MLReader, MLWritable, MLReadable
* Evaluator and subclasses: There is discussion of changes around evaluating 
multiple metrics at once for efficiency.
* RFormula: Its behavior may need to change slightly to match R in edge cases.
* AFTSurvivalRegression
* MultilayerPerceptronClassifier

DeveloperApi changes:
* ml.tree.Node, ml.tree.Split, and subclasses should no longer be DeveloperApi

## How was this patch tested?

N/A

Note to reviewers:
* spark.ml.clustering.LDA underwent significant changes (additional methods), 
so let me know if you want me to leave it Experimental.
* Be careful to check for cases where a class should no longer be Experimental 
but has an Experimental method, val, or other feature.  I did not find such 
cases, but please verify.

Author: Joseph K. Bradley 

Closes #14147 from jkbradley/experimental-audit.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/01f09b16
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/01f09b16
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/01f09b16

Branch: refs/heads/master
Commit: 01f09b161217193b797c8c85969d17054c958615
Parents: d8220c1
Author: Joseph K. Bradley 
Authored: Wed Jul 13 12:33:39 2016 -0700
Committer: Joseph K. Bradley 
Committed: Wed Jul 13 12:33:39 2016 -0700

--
 .../scala/org/apache/spark/ml/Pipeline.scala|  6 +-
 .../classification/DecisionTreeClassifier.scala |  7 +--
 .../spark/ml/classification/GBTClassifier.scala |  7 +--
 .../ml/classification/LogisticRegression.scala  |  4 --
 .../spark/ml/classification/NaiveBayes.scala|  6 +-
 .../spark/ml/classification/OneVsRest.scala |  7 +--
 .../classification/RandomForestClassifier.scala |  7 +--
 .../org/apache/spark/ml/feature/Binarizer.scala |  4 +-
 .../apache/spark/ml/feature/Bucketizer.scala|  4 +-
 .../apache/spark/ml/feature/ChiSqSelector.scala |  6 +-
 .../spark/ml/feature/CountVectorizer.scala  |  6 +-
 .../scala/org/apache/spark/ml/feature/DCT.scala |  4 +-
 .../spark/ml/feature/ElementwiseProduct.scala   |  4 +-
 .../org/apache/spark/ml/feature/HashingTF.scala |  4 +-
 .../scala/org/apache/spark/ml/feature/IDF.scala |  6 +-
 .../apache/spark/ml/feature/Interaction.scala   |  4 +-
 .../apache/spark/ml/feature/LabeledPoint.scala  |  2 +
 .../apache/spark/ml/feature/MinMaxScaler.scala  |  6 +-
 .../org/apache/spark/ml/feature/NGram.scala |  4 +-
 .../apache/spark/ml/feature/Normalizer.scala|  4 +-
 .../apache/spark/ml/feature/OneHotEncoder.scala |  4 +-
 .../scala/org/apache/spark/ml/feature/PCA.scala |  7 +--
 .../spark/ml/feature/PolynomialExpansion.scala  |  4 +-
 .../spark/ml/feature/QuantileDiscretizer.scala  |  4 +-
 .../spark/ml/feature/SQLTransformer.scala   |  4 +-
 .../spark/ml/feature/StandardScaler.scala   |  6 +-
 .../spark/ml/feature/StopWordsRemover.scala |  4 +-
 .../apache/spark/ml/feature/StringIndexer.scala |  8 +--
 .../org/apache/spark/ml/feature/Tokenizer.scala |  6 +-
 .../spark/ml/feature/VectorAssembler.scala  |  4 +-
 .../apache/spark/ml/feature/VectorIndexer.scala |  6 +-
 .../apache/spark/ml/feature/VectorSlicer.scala  |  4 +-
 .../org/apache/spark/ml/feature/Word2Vec.scala  |  7 +--
 .../org/apache/spark/ml/param/params.scala  |  9 +--
 .../apache/spark/ml/recommendation/ALS.scala|  8 +--
 .../ml/regression/DecisionTreeRegressor.scala   |  7 +--
 .../spark/ml/regression/GBTRegressor.scala  |  6 --
 .../ml/regression/IsotonicRegression.scala  |  6 +-
 .../spark/ml/regression/LinearRegression.scala  |  4 --
 .../ml/regression/RandomForestRegressor.scala   |  7 +--
 .../scala/org/apache/spark/ml/tree/Node.scala   | 10 +--
 .../scala/org/apache/spark/ml/tree/Split.scala  |  8 +--
 .../apache/spark/ml/tuning/CrossValidator.scala |  6 +-
 .../spark/ml/tuning/ParamGridBuilder.scala  |  4 +-
 .../spark/ml/tuning/TrainValidationSplit.scala  |  6 +-
 .../mllib/clustering/BisectingKMeans.scala  |  8 +--

spark git commit: [SPARK-16435][YARN][MINOR] Add warning log if initialExecutors is less than minExecutors

2016-07-13 Thread tgraves

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 7d9bd951b -> 90f0e8132


[SPARK-16435][YARN][MINOR] Add warning log if initialExecutors is less than 
minExecutors

## What changes were proposed in this pull request?

Currently if `spark.dynamicAllocation.initialExecutors` is less than 
`spark.dynamicAllocation.minExecutors`, Spark will automatically pick the 
minExecutors without any warning. While in 1.6 Spark will throw exception if 
configured like this. So here propose to add warning log if these parameters 
are configured invalidly.

## How was this patch tested?

Unit test added to verify the scenario.

Author: jerryshao 

Closes #14149 from jerryshao/SPARK-16435.

(cherry picked from commit d8220c1e5e94abbdb9643672b918f0d748206db9)
Signed-off-by: Tom Graves 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/90f0e813
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/90f0e813
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/90f0e813

Branch: refs/heads/branch-2.0
Commit: 90f0e8132bb40158d6d1b6be77e6b512d837466b
Parents: 7d9bd95
Author: jerryshao 
Authored: Wed Jul 13 13:24:47 2016 -0500
Committer: Tom Graves 
Committed: Wed Jul 13 13:25:05 2016 -0500

--
 .../main/scala/org/apache/spark/util/Utils.scala | 19 ++-
 .../scala/org/apache/spark/util/UtilsSuite.scala |  3 +++
 2 files changed, 21 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/90f0e813/core/src/main/scala/org/apache/spark/util/Utils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala 
b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 156cf17..a79d195 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2338,10 +2338,27 @@ private[spark] object Utils extends Logging {
* Return the initial number of executors for dynamic allocation.
*/
   def getDynamicAllocationInitialExecutors(conf: SparkConf): Int = {
-Seq(
+if (conf.get(DYN_ALLOCATION_INITIAL_EXECUTORS) < 
conf.get(DYN_ALLOCATION_MIN_EXECUTORS)) {
+  logWarning(s"${DYN_ALLOCATION_INITIAL_EXECUTORS.key} less than " +
+s"${DYN_ALLOCATION_MIN_EXECUTORS.key} is invalid, ignoring its 
setting, " +
+  "please update your configs.")
+}
+
+if (conf.get(EXECUTOR_INSTANCES).getOrElse(0) < 
conf.get(DYN_ALLOCATION_MIN_EXECUTORS)) {
+  logWarning(s"${EXECUTOR_INSTANCES.key} less than " +
+s"${DYN_ALLOCATION_MIN_EXECUTORS.key} is invalid, ignoring its 
setting, " +
+  "please update your configs.")
+}
+
+val initialExecutors = Seq(
   conf.get(DYN_ALLOCATION_MIN_EXECUTORS),
   conf.get(DYN_ALLOCATION_INITIAL_EXECUTORS),
   conf.get(EXECUTOR_INSTANCES).getOrElse(0)).max
+
+logInfo(s"Using initial executors = $initialExecutors, max of " +
+  s"${DYN_ALLOCATION_INITIAL_EXECUTORS.key}, 
${DYN_ALLOCATION_MIN_EXECUTORS.key} and " +
+s"${EXECUTOR_INSTANCES.key}")
+initialExecutors
   }
 
   def tryWithResource[R <: Closeable, T](createResource: => R)(f: R => T): T = 
{

http://git-wip-us.apache.org/repos/asf/spark/blob/90f0e813/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala 
b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index f5d0fb0..30952a9 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -782,6 +782,9 @@ class UtilsSuite extends SparkFunSuite with 
ResetSystemProperties with Logging {
   conf.set("spark.dynamicAllocation.initialExecutors", "3")) === 4)
 assert(Utils.getDynamicAllocationInitialExecutors( // should use 
initialExecutors
   conf.set("spark.dynamicAllocation.initialExecutors", "5")) === 5)
+assert(Utils.getDynamicAllocationInitialExecutors( // should use 
minExecutors
+  conf.set("spark.dynamicAllocation.initialExecutors", "2")
+.set("spark.executor.instances", "1")) === 3)
   }
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16435][YARN][MINOR] Add warning log if initialExecutors is less than minExecutors

2016-07-13 Thread tgraves

Repository: spark
Updated Branches:
  refs/heads/master f376c3726 -> d8220c1e5


[SPARK-16435][YARN][MINOR] Add warning log if initialExecutors is less than 
minExecutors

## What changes were proposed in this pull request?

Currently if `spark.dynamicAllocation.initialExecutors` is less than 
`spark.dynamicAllocation.minExecutors`, Spark will automatically pick the 
minExecutors without any warning. While in 1.6 Spark will throw exception if 
configured like this. So here propose to add warning log if these parameters 
are configured invalidly.

## How was this patch tested?

Unit test added to verify the scenario.

Author: jerryshao 

Closes #14149 from jerryshao/SPARK-16435.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d8220c1e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d8220c1e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d8220c1e

Branch: refs/heads/master
Commit: d8220c1e5e94abbdb9643672b918f0d748206db9
Parents: f376c37
Author: jerryshao 
Authored: Wed Jul 13 13:24:47 2016 -0500
Committer: Tom Graves 
Committed: Wed Jul 13 13:24:47 2016 -0500

--
 .../main/scala/org/apache/spark/util/Utils.scala | 19 ++-
 .../scala/org/apache/spark/util/UtilsSuite.scala |  3 +++
 2 files changed, 21 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d8220c1e/core/src/main/scala/org/apache/spark/util/Utils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala 
b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 298e624..2e4ec4c 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2342,10 +2342,27 @@ private[spark] object Utils extends Logging {
* Return the initial number of executors for dynamic allocation.
*/
   def getDynamicAllocationInitialExecutors(conf: SparkConf): Int = {
-Seq(
+if (conf.get(DYN_ALLOCATION_INITIAL_EXECUTORS) < 
conf.get(DYN_ALLOCATION_MIN_EXECUTORS)) {
+  logWarning(s"${DYN_ALLOCATION_INITIAL_EXECUTORS.key} less than " +
+s"${DYN_ALLOCATION_MIN_EXECUTORS.key} is invalid, ignoring its 
setting, " +
+  "please update your configs.")
+}
+
+if (conf.get(EXECUTOR_INSTANCES).getOrElse(0) < 
conf.get(DYN_ALLOCATION_MIN_EXECUTORS)) {
+  logWarning(s"${EXECUTOR_INSTANCES.key} less than " +
+s"${DYN_ALLOCATION_MIN_EXECUTORS.key} is invalid, ignoring its 
setting, " +
+  "please update your configs.")
+}
+
+val initialExecutors = Seq(
   conf.get(DYN_ALLOCATION_MIN_EXECUTORS),
   conf.get(DYN_ALLOCATION_INITIAL_EXECUTORS),
   conf.get(EXECUTOR_INSTANCES).getOrElse(0)).max
+
+logInfo(s"Using initial executors = $initialExecutors, max of " +
+  s"${DYN_ALLOCATION_INITIAL_EXECUTORS.key}, 
${DYN_ALLOCATION_MIN_EXECUTORS.key} and " +
+s"${EXECUTOR_INSTANCES.key}")
+initialExecutors
   }
 
   def tryWithResource[R <: Closeable, T](createResource: => R)(f: R => T): T = 
{

http://git-wip-us.apache.org/repos/asf/spark/blob/d8220c1e/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala 
b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index f5d0fb0..30952a9 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -782,6 +782,9 @@ class UtilsSuite extends SparkFunSuite with 
ResetSystemProperties with Logging {
   conf.set("spark.dynamicAllocation.initialExecutors", "3")) === 4)
 assert(Utils.getDynamicAllocationInitialExecutors( // should use 
initialExecutors
   conf.set("spark.dynamicAllocation.initialExecutors", "5")) === 5)
+assert(Utils.getDynamicAllocationInitialExecutors( // should use 
minExecutors
+  conf.set("spark.dynamicAllocation.initialExecutors", "2")
+.set("spark.executor.instances", "1")) === 3)
   }
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16343][SQL] Improve the PushDownPredicate rule to pushdown predicates correctly in non-deterministic condition.

2016-07-13 Thread lian

Repository: spark
Updated Branches:
  refs/heads/master ea06e4ef3 -> f376c3726


[SPARK-16343][SQL] Improve the PushDownPredicate rule to pushdown predicates 
correctly in non-deterministic condition.

## What changes were proposed in this pull request?

Currently our Optimizer may reorder the predicates to run them more efficient, 
but in non-deterministic condition, change the order between deterministic 
parts and non-deterministic parts may change the number of input rows. For 
example:
```SELECT a FROM t WHERE rand() < 0.1 AND a = 1```
And
```SELECT a FROM t WHERE a = 1 AND rand() < 0.1```
may call rand() for different times and therefore the output rows differ.

This PR improved this condition by checking whether the predicate is placed 
before any non-deterministic predicates.

## How was this patch tested?

Expanded related testcases in FilterPushdownSuite.

Author: èæå 

Closes #14012 from jiangxb1987/ppd.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f376c372
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f376c372
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f376c372

Branch: refs/heads/master
Commit: f376c37268848dbb4b2fb57677e22ef2bf207b49
Parents: ea06e4e
Author: èæå 
Authored: Thu Jul 14 00:21:27 2016 +0800
Committer: Cheng Lian 
Committed: Thu Jul 14 00:21:27 2016 +0800

--
 .../sql/catalyst/optimizer/Optimizer.scala  | 44 +---
 .../optimizer/FilterPushdownSuite.scala |  8 ++--
 2 files changed, 33 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f376c372/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 368e9a5..08fb019 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1128,19 +1128,23 @@ object PushDownPredicate extends Rule[LogicalPlan] with 
PredicateHelper {
   project.copy(child = Filter(replaceAlias(condition, aliasMap), 
grandChild))
 
 // Push [[Filter]] operators through [[Window]] operators. Parts of the 
predicate that can be
-// pushed beneath must satisfy the following two conditions:
+// pushed beneath must satisfy the following conditions:
 // 1. All the expressions are part of window partitioning key. The 
expressions can be compound.
-// 2. Deterministic
+// 2. Deterministic.
+// 3. Placed before any non-deterministic predicates.
 case filter @ Filter(condition, w: Window)
 if w.partitionSpec.forall(_.isInstanceOf[AttributeReference]) =>
   val partitionAttrs = AttributeSet(w.partitionSpec.flatMap(_.references))
-  val (pushDown, stayUp) = splitConjunctivePredicates(condition).partition 
{ cond =>
-cond.references.subsetOf(partitionAttrs) && cond.deterministic &&
-  // This is for ensuring all the partitioning expressions have been 
converted to alias
-  // in Analyzer. Thus, we do not need to check if the expressions in 
conditions are
-  // the same as the expressions used in partitioning columns.
-  partitionAttrs.forall(_.isInstanceOf[Attribute])
+
+  val (candidates, containingNonDeterministic) =
+splitConjunctivePredicates(condition).span(_.deterministic)
+
+  val (pushDown, rest) = candidates.partition { cond =>
+cond.references.subsetOf(partitionAttrs)
   }
+
+  val stayUp = rest ++ containingNonDeterministic
+
   if (pushDown.nonEmpty) {
 val pushDownPredicate = pushDown.reduce(And)
 val newWindow = w.copy(child = Filter(pushDownPredicate, w.child))
@@ -1159,11 +1163,16 @@ object PushDownPredicate extends Rule[LogicalPlan] with 
PredicateHelper {
 
   // For each filter, expand the alias and check if the filter can be 
evaluated using
   // attributes produced by the aggregate operator's child operator.
-  val (pushDown, stayUp) = splitConjunctivePredicates(condition).partition 
{ cond =>
+  val (candidates, containingNonDeterministic) =
+splitConjunctivePredicates(condition).span(_.deterministic)
+
+  val (pushDown, rest) = candidates.partition { cond =>
 val replaced = replaceAlias(cond, aliasMap)
-replaced.references.subsetOf(aggregate.child.outputSet) && 
replaced.deterministic
+replaced.references.subsetOf(aggregate.child.outputSet)
   }
 
+  val stayUp =

spark git commit: [SPARK-16469] enhanced simulate multiply

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 5a71a0501 -> 7d9bd951b


[SPARK-16469] enhanced simulate multiply

## What changes were proposed in this pull request?

We have a use case of multiplying very big sparse matrices. we have about 
1000x1000 distributed block matrices multiplication and the simulate multiply 
goes like O(n^4) (n being 1000). it takes about 1.5 hours. We modified it 
slightly with classical hashmap and now run in about 30 seconds O(n^2).

## How was this patch tested?

We have added a performance test and verified the reduced time.

Author: oraviv 

Closes #14068 from uzadude/master.

(cherry picked from commit ea06e4ef34c860219a9aeec81816ef53ada96253)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d9bd951
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d9bd951
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d9bd951

Branch: refs/heads/branch-2.0
Commit: 7d9bd951b0b5767ef2c95eb7467f35c9409e7d8c
Parents: 5a71a05
Author: oraviv 
Authored: Wed Jul 13 14:47:08 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 14:47:47 2016 +0100

--
 .../spark/mllib/linalg/distributed/BlockMatrix.scala   | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7d9bd951/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 639295c..9782350 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -426,16 +426,21 @@ class BlockMatrix @Since("1.3.0") (
   partitioner: GridPartitioner): (BlockDestinations, BlockDestinations) = {
 val leftMatrix = blockInfo.keys.collect() // blockInfo should already be 
cached
 val rightMatrix = other.blocks.keys.collect()
+
+val rightCounterpartsHelper = 
rightMatrix.groupBy(_._1).mapValues(_.map(_._2))
 val leftDestinations = leftMatrix.map { case (rowIndex, colIndex) =>
-  val rightCounterparts = rightMatrix.filter(_._1 == colIndex)
-  val partitions = rightCounterparts.map(b => 
partitioner.getPartition((rowIndex, b._2)))
+  val rightCounterparts = rightCounterpartsHelper.getOrElse(colIndex, 
Array())
+  val partitions = rightCounterparts.map(b => 
partitioner.getPartition((rowIndex, b)))
   ((rowIndex, colIndex), partitions.toSet)
 }.toMap
+
+val leftCounterpartsHelper = 
leftMatrix.groupBy(_._2).mapValues(_.map(_._1))
 val rightDestinations = rightMatrix.map { case (rowIndex, colIndex) =>
-  val leftCounterparts = leftMatrix.filter(_._2 == rowIndex)
-  val partitions = leftCounterparts.map(b => 
partitioner.getPartition((b._1, colIndex)))
+  val leftCounterparts = leftCounterpartsHelper.getOrElse(rowIndex, 
Array())
+  val partitions = leftCounterparts.map(b => partitioner.getPartition((b, 
colIndex)))
   ((rowIndex, colIndex), partitions.toSet)
 }.toMap
+
 (leftDestinations, rightDestinations)
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16469] enhanced simulate multiply

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 51ade51a9 -> ea06e4ef3


[SPARK-16469] enhanced simulate multiply

## What changes were proposed in this pull request?

We have a use case of multiplying very big sparse matrices. we have about 
1000x1000 distributed block matrices multiplication and the simulate multiply 
goes like O(n^4) (n being 1000). it takes about 1.5 hours. We modified it 
slightly with classical hashmap and now run in about 30 seconds O(n^2).

## How was this patch tested?

We have added a performance test and verified the reduced time.

Author: oraviv 

Closes #14068 from uzadude/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea06e4ef
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea06e4ef
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea06e4ef

Branch: refs/heads/master
Commit: ea06e4ef34c860219a9aeec81816ef53ada96253
Parents: 51ade51
Author: oraviv 
Authored: Wed Jul 13 14:47:08 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 14:47:08 2016 +0100

--
 .../spark/mllib/linalg/distributed/BlockMatrix.scala   | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ea06e4ef/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 639295c..9782350 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -426,16 +426,21 @@ class BlockMatrix @Since("1.3.0") (
   partitioner: GridPartitioner): (BlockDestinations, BlockDestinations) = {
 val leftMatrix = blockInfo.keys.collect() // blockInfo should already be 
cached
 val rightMatrix = other.blocks.keys.collect()
+
+val rightCounterpartsHelper = 
rightMatrix.groupBy(_._1).mapValues(_.map(_._2))
 val leftDestinations = leftMatrix.map { case (rowIndex, colIndex) =>
-  val rightCounterparts = rightMatrix.filter(_._1 == colIndex)
-  val partitions = rightCounterparts.map(b => 
partitioner.getPartition((rowIndex, b._2)))
+  val rightCounterparts = rightCounterpartsHelper.getOrElse(colIndex, 
Array())
+  val partitions = rightCounterparts.map(b => 
partitioner.getPartition((rowIndex, b)))
   ((rowIndex, colIndex), partitions.toSet)
 }.toMap
+
+val leftCounterpartsHelper = 
leftMatrix.groupBy(_._2).mapValues(_.map(_._1))
 val rightDestinations = rightMatrix.map { case (rowIndex, colIndex) =>
-  val leftCounterparts = leftMatrix.filter(_._2 == rowIndex)
-  val partitions = leftCounterparts.map(b => 
partitioner.getPartition((b._1, colIndex)))
+  val leftCounterparts = leftCounterpartsHelper.getOrElse(rowIndex, 
Array())
+  val partitions = leftCounterparts.map(b => partitioner.getPartition((b, 
colIndex)))
   ((rowIndex, colIndex), partitions.toSet)
 }.toMap
+
 (leftDestinations, rightDestinations)
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for long runs

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 fb0933681 -> 4381e2121


[SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for 
long runs

## What changes were proposed in this pull request?

Unpersist broadcasted vars in Word2Vec.fit for more timely / reliable resource 
cleanup

## How was this patch tested?

Jenkins tests

Author: Sean Owen 

Closes #14153 from srowen/SPARK-16440.

(cherry picked from commit 51ade51a9fd64fc2fe651c505a286e6f29f59d40)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4381e212
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4381e212
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4381e212

Branch: refs/heads/branch-1.6
Commit: 4381e212140102b4bce756146c09e866c7b2d85c
Parents: fb09336
Author: Sean Owen 
Authored: Wed Jul 13 11:39:32 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 11:39:49 2016 +0100

--
 .../src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala  | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4381e212/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 30a1849..c2ed896 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -416,6 +416,9 @@ class Word2Vec extends Serializable with Logging {
   }
 }
 newSentences.unpersist()
+expTable.unpersist()
+bcVocab.unpersist()
+bcVocabHash.unpersist()
 
 val wordArray = vocab.map(_.word)
 new Word2VecModel(wordArray.zipWithIndex.toMap, syn0Global)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for long runs

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 3d6f679cf -> 51ade51a9


[SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for 
long runs

## What changes were proposed in this pull request?

Unpersist broadcasted vars in Word2Vec.fit for more timely / reliable resource 
cleanup

## How was this patch tested?

Jenkins tests

Author: Sean Owen 

Closes #14153 from srowen/SPARK-16440.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51ade51a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51ade51a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51ade51a

Branch: refs/heads/master
Commit: 51ade51a9fd64fc2fe651c505a286e6f29f59d40
Parents: 3d6f679
Author: Sean Owen 
Authored: Wed Jul 13 11:39:32 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 11:39:32 2016 +0100

--
 .../src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala  | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/51ade51a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index f2211df..6b9c8ee 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -434,6 +434,9 @@ class Word2Vec extends Serializable with Logging {
   bcSyn1Global.unpersist(false)
 }
 newSentences.unpersist()
+expTable.unpersist()
+bcVocab.unpersist()
+bcVocabHash.unpersist()
 
 val wordArray = vocab.map(_.word)
 new Word2VecModel(wordArray.zipWithIndex.toMap, syn0Global)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for long runs

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 74ad486dc -> 5a71a0501


[SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for 
long runs

## What changes were proposed in this pull request?

Unpersist broadcasted vars in Word2Vec.fit for more timely / reliable resource 
cleanup

## How was this patch tested?

Jenkins tests

Author: Sean Owen 

Closes #14153 from srowen/SPARK-16440.

(cherry picked from commit 51ade51a9fd64fc2fe651c505a286e6f29f59d40)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5a71a050
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5a71a050
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5a71a050

Branch: refs/heads/branch-2.0
Commit: 5a71a05015ac7aabfb6c4aa8753abc87ead20718
Parents: 74ad486
Author: Sean Owen 
Authored: Wed Jul 13 11:39:32 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 11:39:39 2016 +0100

--
 .../src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala  | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5a71a050/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index f2211df..6b9c8ee 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -434,6 +434,9 @@ class Word2Vec extends Serializable with Logging {
   bcSyn1Global.unpersist(false)
 }
 newSentences.unpersist()
+expTable.unpersist()
+bcVocab.unpersist()
+bcVocabHash.unpersist()
 
 val wordArray = vocab.map(_.word)
 new Word2VecModel(wordArray.zipWithIndex.toMap, syn0Global)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR][YARN] Fix code error in yarn-cluster unit test

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 a34a54435 -> 74ad486dc


[MINOR][YARN] Fix code error in yarn-cluster unit test

## What changes were proposed in this pull request?

Fix code error in yarn-cluster unit test.

## How was this patch tested?

Use exist tests

Author: sharkd 

Closes #14166 from sharkdtu/master.

(cherry picked from commit 3d6f679cfe5945a9f72841727342af39e9410e0a)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/74ad486d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/74ad486d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/74ad486d

Branch: refs/heads/branch-2.0
Commit: 74ad486dc886d8899c79eae8a78622eff05aeab6
Parents: a34a544
Author: sharkd 
Authored: Wed Jul 13 11:36:02 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 11:36:09 2016 +0100

--
 .../test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/74ad486d/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
--
diff --git 
a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala 
b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 874e304..1ccd7e5 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -97,7 +97,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
   }
 
   test("run Spark in yarn-cluster mode with different configurations") {
-testBasicYarnApp(true,
+testBasicYarnApp(false,
   Map(
 "spark.driver.memory" -> "512m",
 "spark.driver.cores" -> "1",


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR][YARN] Fix code error in yarn-cluster unit test

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master bf107f1e6 -> 3d6f679cf


[MINOR][YARN] Fix code error in yarn-cluster unit test

## What changes were proposed in this pull request?

Fix code error in yarn-cluster unit test.

## How was this patch tested?

Use exist tests

Author: sharkd 

Closes #14166 from sharkdtu/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d6f679c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d6f679c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d6f679c

Branch: refs/heads/master
Commit: 3d6f679cfe5945a9f72841727342af39e9410e0a
Parents: bf107f1
Author: sharkd 
Authored: Wed Jul 13 11:36:02 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 11:36:02 2016 +0100

--
 .../test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3d6f679c/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
--
diff --git 
a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala 
b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 874e304..1ccd7e5 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -97,7 +97,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
   }
 
   test("run Spark in yarn-cluster mode with different configurations") {
-testBasicYarnApp(true,
+testBasicYarnApp(false,
   Map(
 "spark.driver.memory" -> "512m",
 "spark.driver.cores" -> "1",


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16438] Add Asynchronous Actions documentation

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 38787ec93 -> a34a54435


[SPARK-16438] Add Asynchronous Actions documentation

## What changes were proposed in this pull request?

Add Asynchronous Actions documentation inside action of programming guide

## How was this patch tested?

check the documentation indentation and formatting with md preview.

Author: sandy 

Closes #14104 from phalodi/SPARK-16438.

(cherry picked from commit bf107f1e6522f9138d454b0723089c24626e775a)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a34a5443
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a34a5443
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a34a5443

Branch: refs/heads/branch-2.0
Commit: a34a54435f6af572b33017945dd34a1b5898bf31
Parents: 38787ec
Author: sandy 
Authored: Wed Jul 13 11:33:46 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 11:33:54 2016 +0100

--
 docs/programming-guide.md | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a34a5443/docs/programming-guide.md
--
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 3872aec..2bc4912 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -1101,6 +1101,9 @@ for details.
 
 
 
+The Spark RDD API also exposes asynchronous versions of some actions, like 
`foreachAsync` for `foreach`, which immediately return a `FutureAction` to the 
caller instead of blocking on completion of the action. This can be used to 
manage or wait for the asynchronous execution of the action.
+
+
 ### Shuffle operations
 
 Certain operations within Spark trigger an event known as the shuffle. The 
shuffle is Spark's


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16438] Add Asynchronous Actions documentation

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 83879ebc5 -> bf107f1e6


[SPARK-16438] Add Asynchronous Actions documentation

## What changes were proposed in this pull request?

Add Asynchronous Actions documentation inside action of programming guide

## How was this patch tested?

check the documentation indentation and formatting with md preview.

Author: sandy 

Closes #14104 from phalodi/SPARK-16438.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf107f1e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf107f1e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf107f1e

Branch: refs/heads/master
Commit: bf107f1e6522f9138d454b0723089c24626e775a
Parents: 83879eb
Author: sandy 
Authored: Wed Jul 13 11:33:46 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 11:33:46 2016 +0100

--
 docs/programming-guide.md | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bf107f1e/docs/programming-guide.md
--
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 3872aec..2bc4912 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -1101,6 +1101,9 @@ for details.
 
 
 
+The Spark RDD API also exposes asynchronous versions of some actions, like 
`foreachAsync` for `foreach`, which immediately return a `FutureAction` to the 
caller instead of blocking on completion of the action. This can be used to 
manage or wait for the asynchronous execution of the action.
+
+
 ### Shuffle operations
 
 Certain operations within Spark trigger an event known as the shuffle. The 
shuffle is Spark's


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16439] Fix number formatting in SQL UI

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 934e2aa4f -> 38787ec93


[SPARK-16439] Fix number formatting in SQL UI

## What changes were proposed in this pull request?

Spark SQL UI display numbers greater than 1000 with u00A0 as grouping separator.
Problem exists when server locale has no-breaking space as separator. (for 
example pl_PL)
This patch turns off grouping and remove this separator.

The problem starts with this PR.
https://github.com/apache/spark/pull/12425/files#diff-803f475b01acfae1c5c96807c2ea9ddcR125

## How was this patch tested?

Manual UI tests. Screenshot attached.

![image](https://cloud.githubusercontent.com/assets/4006010/16749556/5cb5a372-47cb-11e6-9a95-67fd3f9d1c71.png)

Author: Maciej Brynski 

Closes #14142 from maver1ck/master.

(cherry picked from commit 83879ebc5850b74369a5b066c65fa9929bbdb21c)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/38787ec9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/38787ec9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/38787ec9

Branch: refs/heads/branch-2.0
Commit: 38787ec9361bde444ba00cac6822c491acd14fcc
Parents: 934e2aa
Author: Maciej Brynski 
Authored: Wed Jul 13 10:50:26 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 10:50:34 2016 +0100

--
 .../scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/38787ec9/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index edfdf7c..9817a56 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -101,7 +101,9 @@ private[sql] object SQLMetrics {
*/
   def stringValue(metricsType: String, values: Seq[Long]): String = {
 if (metricsType == SUM_METRIC) {
-  NumberFormat.getInstance().format(values.sum)
+  val numberFormat = NumberFormat.getInstance()
+  numberFormat.setGroupingUsed(false)
+  numberFormat.format(values.sum)
 } else {
   val strFormat: Long => String = if (metricsType == SIZE_METRIC) {
 Utils.bytesToString


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16439] Fix number formatting in SQL UI

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master f73891e0b -> 83879ebc5


[SPARK-16439] Fix number formatting in SQL UI

## What changes were proposed in this pull request?

Spark SQL UI display numbers greater than 1000 with u00A0 as grouping separator.
Problem exists when server locale has no-breaking space as separator. (for 
example pl_PL)
This patch turns off grouping and remove this separator.

The problem starts with this PR.
https://github.com/apache/spark/pull/12425/files#diff-803f475b01acfae1c5c96807c2ea9ddcR125

## How was this patch tested?

Manual UI tests. Screenshot attached.

![image](https://cloud.githubusercontent.com/assets/4006010/16749556/5cb5a372-47cb-11e6-9a95-67fd3f9d1c71.png)

Author: Maciej Brynski 

Closes #14142 from maver1ck/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/83879ebc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/83879ebc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/83879ebc

Branch: refs/heads/master
Commit: 83879ebc5850b74369a5b066c65fa9929bbdb21c
Parents: f73891e
Author: Maciej Brynski 
Authored: Wed Jul 13 10:50:26 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 10:50:26 2016 +0100

--
 .../scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/83879ebc/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index edfdf7c..9817a56 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -101,7 +101,9 @@ private[sql] object SQLMetrics {
*/
   def stringValue(metricsType: String, values: Seq[Long]): String = {
 if (metricsType == SUM_METRIC) {
-  NumberFormat.getInstance().format(values.sum)
+  val numberFormat = NumberFormat.getInstance()
+  numberFormat.setGroupingUsed(false)
+  numberFormat.format(values.sum)
 } else {
   val strFormat: Long => String = if (metricsType == SIZE_METRIC) {
 Utils.bytesToString


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR] Fix Java style errors and remove unused imports

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 5301efc17 -> 934e2aa4f


[MINOR] Fix Java style errors and remove unused imports

Fix Java style errors and remove unused imports, which are randomly found

Tested on my local machine.

Author: Xin Ren 

Closes #14161 from keypointt/SPARK-16437.

(cherry picked from commit f73891e0b9640e14455bdbfd999a8ff10b78a819)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/934e2aa4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/934e2aa4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/934e2aa4

Branch: refs/heads/branch-2.0
Commit: 934e2aa4f8aea409c8814f394f760f5952bd48f1
Parents: 5301efc
Author: Xin Ren 
Authored: Wed Jul 13 10:47:07 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 10:48:39 2016 +0100

--
 .../sql/execution/datasources/parquet/ParquetFileFormat.scala | 3 +--
 .../test/scala/org/apache/spark/sql/sources/DataSourceTest.scala  | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/934e2aa4/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 8cbdaeb..f1c78bb 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -778,8 +778,7 @@ private[sql] object ParquetFileFormat extends Logging {
 val assumeBinaryIsString = 
sparkSession.sessionState.conf.isParquetBinaryAsString
 val assumeInt96IsTimestamp = 
sparkSession.sessionState.conf.isParquetINT96AsTimestamp
 val writeLegacyParquetFormat = 
sparkSession.sessionState.conf.writeLegacyParquetFormat
-val serializedConf =
-  new SerializableConfiguration(sparkSession.sessionState.newHadoopConf())
+val serializedConf = new 
SerializableConfiguration(sparkSession.sessionState.newHadoopConf())
 
 // !! HACK ALERT !!
 //

http://git-wip-us.apache.org/repos/asf/spark/blob/934e2aa4/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
index 206d03e..cc77d3c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.sources
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.internal.SQLConf
 
 private[sql] abstract class DataSourceTest extends QueryTest {
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MINOR] Fix Java style errors and remove unused imports

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master f156136da -> f73891e0b


[MINOR] Fix Java style errors and remove unused imports

## What changes were proposed in this pull request?

Fix Java style errors and remove unused imports, which are randomly found

## How was this patch tested?

Tested on my local machine.

Author: Xin Ren 

Closes #14161 from keypointt/SPARK-16437.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f73891e0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f73891e0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f73891e0

Branch: refs/heads/master
Commit: f73891e0b9640e14455bdbfd999a8ff10b78a819
Parents: f156136
Author: Xin Ren 
Authored: Wed Jul 13 10:47:07 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 10:47:07 2016 +0100

--
 .../org/apache/spark/unsafe/memory/HeapMemoryAllocator.java| 1 -
 .../java/org/apache/spark/unsafe/memory/MemoryAllocator.java   | 6 +++---
 .../sql/execution/datasources/parquet/ParquetFileFormat.scala  | 3 +--
 .../scala/org/apache/spark/sql/sources/DataSourceTest.scala| 1 -
 4 files changed, 4 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f73891e0/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
--
diff --git 
a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
index 3cd4264..3557482 100644
--- 
a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
+++ 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
@@ -24,7 +24,6 @@ import java.util.LinkedList;
 import java.util.Map;
 
 import org.apache.spark.unsafe.Platform;
-import org.apache.spark.unsafe.memory.MemoryAllocator;
 
 /**
  * A simple {@link MemoryAllocator} that can allocate up to 16GB using a JVM 
long primitive array.

http://git-wip-us.apache.org/repos/asf/spark/blob/f73891e0/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
--
diff --git 
a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
index 8bd2b06..7b58868 100644
--- 
a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
+++ 
b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
@@ -23,12 +23,12 @@ public interface MemoryAllocator {
* Whether to fill newly allocated and deallocated memory with 0xa5 and 0x5a 
bytes respectively.
* This helps catch misuse of uninitialized or freed memory, but imposes 
some overhead.
*/
-  public static final boolean MEMORY_DEBUG_FILL_ENABLED = Boolean.parseBoolean(
+  boolean MEMORY_DEBUG_FILL_ENABLED = Boolean.parseBoolean(
 System.getProperty("spark.memory.debugFill", "false"));
 
   // Same as jemalloc's debug fill values.
-  public static final byte MEMORY_DEBUG_FILL_CLEAN_VALUE = (byte)0xa5;
-  public static final byte MEMORY_DEBUG_FILL_FREED_VALUE = (byte)0x5a;
+  byte MEMORY_DEBUG_FILL_CLEAN_VALUE = (byte)0xa5;
+  byte MEMORY_DEBUG_FILL_FREED_VALUE = (byte)0x5a;
 
   /**
* Allocates a contiguous block of memory. Note that the allocated memory is 
not guaranteed

http://git-wip-us.apache.org/repos/asf/spark/blob/f73891e0/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 76d7f5c..772e031 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -780,8 +780,7 @@ private[sql] object ParquetFileFormat extends Logging {
 val assumeBinaryIsString = 
sparkSession.sessionState.conf.isParquetBinaryAsString
 val assumeInt96IsTimestamp = 
sparkSession.sessionState.conf.isParquetINT96AsTimestamp
 val writeLegacyParquetFormat = 
sparkSession.sessionState.conf.writeLegacyParquetFormat
-val serializedConf =
-  new SerializableConfiguration(sparkSession.sessionState.newHadoopConf())
+val serializedConf = new 
SerializableConfiguration(sparkSession.sessionState.newHadoopConf())
 
 // !!

spark git commit: [SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to numSkippedTasks

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 4b93a833b -> 5301efc17


[SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to 
numSkippedTasks

## What changes were proposed in this pull request?

I fixed a misassigned var,  numCompletedTasks was assigned to numSkippedTasks 
in the convertJobData method

## How was this patch tested?

dev/run-tests

Author: Alex Bozarth 

Closes #14141 from ajbozarth/spark16375.

(cherry picked from commit f156136dae5df38f73a25cf3fb48f98f417ef059)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5301efc1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5301efc1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5301efc1

Branch: refs/heads/branch-2.0
Commit: 5301efc1779c9d4a14ea1238bbbfa4aab25d3163
Parents: 4b93a83
Author: Alex Bozarth 
Authored: Wed Jul 13 10:45:06 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 10:45:14 2016 +0100

--
 .../scala/org/apache/spark/status/api/v1/AllJobsResource.scala | 2 +-
 .../job_list_from_multi_attempt_app_json_1__expectation.json   | 2 +-
 .../job_list_from_multi_attempt_app_json_2__expectation.json   | 2 +-
 .../HistoryServerExpectations/job_list_json_expectation.json   | 6 +++---
 .../HistoryServerExpectations/one_job_json_expectation.json| 2 +-
 .../succeeded_failed_job_list_json_expectation.json| 6 +++---
 .../succeeded_job_list_json_expectation.json   | 4 ++--
 7 files changed, 12 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5301efc1/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala 
b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
index b21d36d..d0d9ef1 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
@@ -91,7 +91,7 @@ private[v1] object AllJobsResource {
 numTasks = job.numTasks,
 numActiveTasks = job.numActiveTasks,
 numCompletedTasks = job.numCompletedTasks,
-numSkippedTasks = job.numCompletedTasks,
+numSkippedTasks = job.numSkippedTasks,
 numFailedTasks = job.numFailedTasks,
 numActiveStages = job.numActiveStages,
 numCompletedStages = job.completedStageIndices.size,

http://git-wip-us.apache.org/repos/asf/spark/blob/5301efc1/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
--
diff --git 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
index bb6bf43..c108fa6 100644
--- 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
+++ 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
@@ -6,7 +6,7 @@
   "numTasks" : 8,
   "numActiveTasks" : 0,
   "numCompletedTasks" : 8,
-  "numSkippedTasks" : 8,
+  "numSkippedTasks" : 0,
   "numFailedTasks" : 0,
   "numActiveStages" : 0,
   "numCompletedStages" : 1,

http://git-wip-us.apache.org/repos/asf/spark/blob/5301efc1/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
--
diff --git 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
index bb6bf43..c108fa6 100644
--- 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
+++ 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
@@ -6,7 +6,7 @@
   "numTasks" : 8,
   "numActiveTasks" : 0,
   "numCompletedTasks" : 8,
-  "numSkippedTasks" : 8,
+  "numSkippedTasks" : 0,
   "numFailedTasks" : 0,
   "numActiveStages" : 0,
   "numCompletedStages" : 1,

http://git-wip-us.apache.org/repos/asf/spark/blob/5301efc1/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
--
diff --git

spark git commit: [SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to numSkippedTasks

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master c190d89bd -> f156136da


[SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to 
numSkippedTasks

## What changes were proposed in this pull request?

I fixed a misassigned var,  numCompletedTasks was assigned to numSkippedTasks 
in the convertJobData method

## How was this patch tested?

dev/run-tests

Author: Alex Bozarth 

Closes #14141 from ajbozarth/spark16375.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f156136d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f156136d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f156136d

Branch: refs/heads/master
Commit: f156136dae5df38f73a25cf3fb48f98f417ef059
Parents: c190d89
Author: Alex Bozarth 
Authored: Wed Jul 13 10:45:06 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 10:45:06 2016 +0100

--
 .../scala/org/apache/spark/status/api/v1/AllJobsResource.scala | 2 +-
 .../job_list_from_multi_attempt_app_json_1__expectation.json   | 2 +-
 .../job_list_from_multi_attempt_app_json_2__expectation.json   | 2 +-
 .../HistoryServerExpectations/job_list_json_expectation.json   | 6 +++---
 .../HistoryServerExpectations/one_job_json_expectation.json| 2 +-
 .../succeeded_failed_job_list_json_expectation.json| 6 +++---
 .../succeeded_job_list_json_expectation.json   | 4 ++--
 7 files changed, 12 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f156136d/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala 
b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
index b21d36d..d0d9ef1 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
@@ -91,7 +91,7 @@ private[v1] object AllJobsResource {
 numTasks = job.numTasks,
 numActiveTasks = job.numActiveTasks,
 numCompletedTasks = job.numCompletedTasks,
-numSkippedTasks = job.numCompletedTasks,
+numSkippedTasks = job.numSkippedTasks,
 numFailedTasks = job.numFailedTasks,
 numActiveStages = job.numActiveStages,
 numCompletedStages = job.completedStageIndices.size,

http://git-wip-us.apache.org/repos/asf/spark/blob/f156136d/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
--
diff --git 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
index bb6bf43..c108fa6 100644
--- 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
+++ 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
@@ -6,7 +6,7 @@
   "numTasks" : 8,
   "numActiveTasks" : 0,
   "numCompletedTasks" : 8,
-  "numSkippedTasks" : 8,
+  "numSkippedTasks" : 0,
   "numFailedTasks" : 0,
   "numActiveStages" : 0,
   "numCompletedStages" : 1,

http://git-wip-us.apache.org/repos/asf/spark/blob/f156136d/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
--
diff --git 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
index bb6bf43..c108fa6 100644
--- 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
+++ 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
@@ -6,7 +6,7 @@
   "numTasks" : 8,
   "numActiveTasks" : 0,
   "numCompletedTasks" : 8,
-  "numSkippedTasks" : 8,
+  "numSkippedTasks" : 0,
   "numFailedTasks" : 0,
   "numActiveStages" : 0,
   "numCompletedStages" : 1,

http://git-wip-us.apache.org/repos/asf/spark/blob/f156136d/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
--
diff --git 
a/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
 
b/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
index

spark git commit: [SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to numSkippedTasks

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 980db2bd4 -> fb0933681


[SPARK-16375][WEB UI] Fixed misassigned var: numCompletedTasks was assigned to 
numSkippedTasks

## What changes were proposed in this pull request?

I fixed a misassigned var,  numCompletedTasks was assigned to numSkippedTasks 
in the convertJobData method

## How was this patch tested?

dev/run-tests

Author: Alex Bozarth 

Closes #14141 from ajbozarth/spark16375.

(cherry picked from commit f156136dae5df38f73a25cf3fb48f98f417ef059)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb093368
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb093368
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb093368

Branch: refs/heads/branch-1.6
Commit: fb0933681db199af85543ccb5601b44a4af92321
Parents: 980db2b
Author: Alex Bozarth 
Authored: Wed Jul 13 10:45:06 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 10:45:24 2016 +0100

--
 .../scala/org/apache/spark/status/api/v1/AllJobsResource.scala | 2 +-
 .../job_list_from_multi_attempt_app_json_1__expectation.json   | 2 +-
 .../job_list_from_multi_attempt_app_json_2__expectation.json   | 2 +-
 .../HistoryServerExpectations/job_list_json_expectation.json   | 6 +++---
 .../HistoryServerExpectations/one_job_json_expectation.json| 2 +-
 .../succeeded_failed_job_list_json_expectation.json| 6 +++---
 .../succeeded_job_list_json_expectation.json   | 4 ++--
 7 files changed, 12 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fb093368/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala 
b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
index 5783df5..4ac7b3d 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
@@ -86,7 +86,7 @@ private[v1] object AllJobsResource {
 numTasks = job.numTasks,
 numActiveTasks = job.numActiveTasks,
 numCompletedTasks = job.numCompletedTasks,
-numSkippedTasks = job.numCompletedTasks,
+numSkippedTasks = job.numSkippedTasks,
 numFailedTasks = job.numFailedTasks,
 numActiveStages = job.numActiveStages,
 numCompletedStages = job.completedStageIndices.size,

http://git-wip-us.apache.org/repos/asf/spark/blob/fb093368/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
--
diff --git 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
index 2e92e1f..1940a60 100644
--- 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
+++ 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
@@ -6,7 +6,7 @@
   "numTasks" : 8,
   "numActiveTasks" : 0,
   "numCompletedTasks" : 8,
-  "numSkippedTasks" : 8,
+  "numSkippedTasks" : 0,
   "numFailedTasks" : 0,
   "numActiveStages" : 0,
   "numCompletedStages" : 1,

http://git-wip-us.apache.org/repos/asf/spark/blob/fb093368/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
--
diff --git 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
index 2e92e1f..1940a60 100644
--- 
a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
+++ 
b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
@@ -6,7 +6,7 @@
   "numTasks" : 8,
   "numActiveTasks" : 0,
   "numCompletedTasks" : 8,
-  "numSkippedTasks" : 8,
+  "numSkippedTasks" : 0,
   "numFailedTasks" : 0,
   "numActiveStages" : 0,
   "numCompletedStages" : 1,

http://git-wip-us.apache.org/repos/asf/spark/blob/fb093368/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
--
diff --git

spark git commit: [SPARK-15889][STREAMING] Follow-up fix to erroneous condition in StreamTest

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 772c213ec -> c190d89bd


[SPARK-15889][STREAMING] Follow-up fix to erroneous condition in StreamTest

## What changes were proposed in this pull request?

A second form of AssertQuery now actually invokes the condition; avoids a build 
warning too

## How was this patch tested?

Jenkins; running StreamTest

Author: Sean Owen 

Closes #14133 from srowen/SPARK-15889.2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c190d89b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c190d89b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c190d89b

Branch: refs/heads/master
Commit: c190d89bd3cf677400c49238498207b87da9ee78
Parents: 772c213
Author: Sean Owen 
Authored: Wed Jul 13 10:44:07 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 10:44:07 2016 +0100

--
 .../src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c190d89b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index f949652..af2b581 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -189,7 +189,7 @@ trait StreamTest extends QueryTest with SharedSQLContext 
with Timeouts {
 }
 
 def apply(message: String)(condition: StreamExecution => Unit): 
AssertOnQuery = {
-  new AssertOnQuery(s => { condition; true }, message)
+  new AssertOnQuery(s => { condition(s); true }, message)
 }
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15889][STREAMING] Follow-up fix to erroneous condition in StreamTest

2016-07-13 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 5173f847c -> 4b93a833b


[SPARK-15889][STREAMING] Follow-up fix to erroneous condition in StreamTest

## What changes were proposed in this pull request?

A second form of AssertQuery now actually invokes the condition; avoids a build 
warning too

## How was this patch tested?

Jenkins; running StreamTest

Author: Sean Owen 

Closes #14133 from srowen/SPARK-15889.2.

(cherry picked from commit c190d89bd3cf677400c49238498207b87da9ee78)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b93a833
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b93a833
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b93a833

Branch: refs/heads/branch-2.0
Commit: 4b93a833b75d72043fd7770250c25247e690666d
Parents: 5173f84
Author: Sean Owen 
Authored: Wed Jul 13 10:44:07 2016 +0100
Committer: Sean Owen 
Committed: Wed Jul 13 10:44:15 2016 +0100

--
 .../src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4b93a833/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index f949652..af2b581 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -189,7 +189,7 @@ trait StreamTest extends QueryTest with SharedSQLContext 
with Timeouts {
 }
 
 def apply(message: String)(condition: StreamExecution => Unit): 
AssertOnQuery = {
-  new AssertOnQuery(s => { condition; true }, message)
+  new AssertOnQuery(s => { condition(s); true }, message)
 }
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16303][DOCS][EXAMPLES] Updated SQL programming guide and examples

2016-07-13 Thread lian

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 41df62c59 -> 5173f847c


[SPARK-16303][DOCS][EXAMPLES] Updated SQL programming guide and examples

- Hard-coded Spark SQL sample snippets were moved into source files under 
examples sub-project.
- Removed the inconsistency between Scala and Java Spark SQL examples
- Scala and Java Spark SQL examples were updated

The work is still in progress. All involved examples were tested manually. An 
additional round of testing will be done after the code review.

![image](https://cloud.githubusercontent.com/assets/6235869/16710314/51851606-462a-11e6-9fbe-0818daef65e4.png)

Author: aokolnychyi 

Closes #14119 from aokolnychyi/spark_16303.

(cherry picked from commit 772c213ec702c80d0f25aa6f30b2dffebfbe2d0d)
Signed-off-by: Cheng Lian 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5173f847
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5173f847
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5173f847

Branch: refs/heads/branch-2.0
Commit: 5173f847c55a7b810d1c494c8b23c740ba110c39
Parents: 41df62c
Author: aokolnychyi 
Authored: Wed Jul 13 16:12:05 2016 +0800
Committer: Cheng Lian 
Committed: Wed Jul 13 16:12:51 2016 +0800

--
 docs/sql-programming-guide.md   | 572 ++-
 .../apache/spark/examples/sql/JavaSparkSQL.java | 186 --
 .../spark/examples/sql/JavaSparkSqlExample.java | 336 +++
 .../examples/sql/JavaSqlDataSourceExample.java  | 217 +++
 .../examples/sql/hive/JavaSparkHiveExample.java | 131 +
 .../spark/examples/sql/SparkSqlExample.scala| 254 
 .../examples/sql/SqlDataSourceExample.scala | 148 +
 .../spark/examples/sql/hive/HiveFromSpark.scala |  83 ---
 .../examples/sql/hive/SparkHiveExample.scala| 107 
 9 files changed, 1228 insertions(+), 806 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5173f847/docs/sql-programming-guide.md
--
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 448251c..f5d1fee 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -65,14 +65,14 @@ Throughout this document, we will often refer to Scala/Java 
Datasets of `Row`s a
 
 The entry point into all functionality in Spark is the 
[`SparkSession`](api/scala/index.html#org.apache.spark.sql.SparkSession) class. 
To create a basic `SparkSession`, just use `SparkSession.builder()`:
 
-{% include_example init_session 
scala/org/apache/spark/examples/sql/RDDRelation.scala %}
+{% include_example init_session 
scala/org/apache/spark/examples/sql/SparkSqlExample.scala %}
 
 
 
 
 The entry point into all functionality in Spark is the 
[`SparkSession`](api/java/index.html#org.apache.spark.sql.SparkSession) class. 
To create a basic `SparkSession`, just use `SparkSession.builder()`:
 
-{% include_example init_session 
java/org/apache/spark/examples/sql/JavaSparkSQL.java %}
+{% include_example init_session 
java/org/apache/spark/examples/sql/JavaSparkSqlExample.java %}
 
 
 
@@ -105,14 +105,7 @@ from a Hive table, or from [Spark data 
sources](#data-sources).
 
 As an example, the following creates a DataFrame based on the content of a 
JSON file:
 
-{% highlight scala %}
-val spark: SparkSession // An existing SparkSession.
-val df = spark.read.json("examples/src/main/resources/people.json")
-
-// Displays the content of the DataFrame to stdout
-df.show()
-{% endhighlight %}
-
+{% include_example create_df 
scala/org/apache/spark/examples/sql/SparkSqlExample.scala %}
 
 
 
@@ -121,14 +114,7 @@ from a Hive table, or from [Spark data 
sources](#data-sources).
 
 As an example, the following creates a DataFrame based on the content of a 
JSON file:
 
-{% highlight java %}
-SparkSession spark = ...; // An existing SparkSession.
-Dataset df = spark.read().json("examples/src/main/resources/people.json");
-
-// Displays the content of the DataFrame to stdout
-df.show();
-{% endhighlight %}
-
+{% include_example create_df 
java/org/apache/spark/examples/sql/JavaSparkSqlExample.java %}
 
 
 
@@ -169,110 +155,20 @@ Here we include some basic examples of structured data 
processing using Datasets
 
 
 
-{% highlight scala %}
-val spark: SparkSession // An existing SparkSession
-
-// Create the DataFrame
-val df = spark.read.json("examples/src/main/resources/people.json")
-
-// Show the content of the DataFrame
-df.show()
-// age  name
-// null Michael
-// 30   Andy
-// 19   Justin
-
-// Print the schema in a tree format
-df.printSchema()
-// root
-// |-- age: long (nullable = true)
-// |-- name: string (nullable = true)
-
-// Select only the "name" column

spark git commit: [SPARK-16303][DOCS][EXAMPLES] Updated SQL programming guide and examples

2016-07-13 Thread lian

Repository: spark
Updated Branches:
  refs/heads/master 1c58fa905 -> 772c213ec


[SPARK-16303][DOCS][EXAMPLES] Updated SQL programming guide and examples

- Hard-coded Spark SQL sample snippets were moved into source files under 
examples sub-project.
- Removed the inconsistency between Scala and Java Spark SQL examples
- Scala and Java Spark SQL examples were updated

The work is still in progress. All involved examples were tested manually. An 
additional round of testing will be done after the code review.

![image](https://cloud.githubusercontent.com/assets/6235869/16710314/51851606-462a-11e6-9fbe-0818daef65e4.png)

Author: aokolnychyi 

Closes #14119 from aokolnychyi/spark_16303.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/772c213e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/772c213e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/772c213e

Branch: refs/heads/master
Commit: 772c213ec702c80d0f25aa6f30b2dffebfbe2d0d
Parents: 1c58fa9
Author: aokolnychyi 
Authored: Wed Jul 13 16:12:05 2016 +0800
Committer: Cheng Lian 
Committed: Wed Jul 13 16:12:11 2016 +0800

--
 docs/sql-programming-guide.md   | 572 ++-
 .../apache/spark/examples/sql/JavaSparkSQL.java | 186 --
 .../spark/examples/sql/JavaSparkSqlExample.java | 336 +++
 .../examples/sql/JavaSqlDataSourceExample.java  | 217 +++
 .../examples/sql/hive/JavaSparkHiveExample.java | 131 +
 .../spark/examples/sql/SparkSqlExample.scala| 254 
 .../examples/sql/SqlDataSourceExample.scala | 148 +
 .../spark/examples/sql/hive/HiveFromSpark.scala |  83 ---
 .../examples/sql/hive/SparkHiveExample.scala| 107 
 9 files changed, 1228 insertions(+), 806 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/772c213e/docs/sql-programming-guide.md
--
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index e838a13..2076b29 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -65,14 +65,14 @@ Throughout this document, we will often refer to Scala/Java 
Datasets of `Row`s a
 
 The entry point into all functionality in Spark is the 
[`SparkSession`](api/scala/index.html#org.apache.spark.sql.SparkSession) class. 
To create a basic `SparkSession`, just use `SparkSession.builder()`:
 
-{% include_example init_session 
scala/org/apache/spark/examples/sql/RDDRelation.scala %}
+{% include_example init_session 
scala/org/apache/spark/examples/sql/SparkSqlExample.scala %}
 
 
 
 
 The entry point into all functionality in Spark is the 
[`SparkSession`](api/java/index.html#org.apache.spark.sql.SparkSession) class. 
To create a basic `SparkSession`, just use `SparkSession.builder()`:
 
-{% include_example init_session 
java/org/apache/spark/examples/sql/JavaSparkSQL.java %}
+{% include_example init_session 
java/org/apache/spark/examples/sql/JavaSparkSqlExample.java %}
 
 
 
@@ -105,14 +105,7 @@ from a Hive table, or from [Spark data 
sources](#data-sources).
 
 As an example, the following creates a DataFrame based on the content of a 
JSON file:
 
-{% highlight scala %}
-val spark: SparkSession // An existing SparkSession.
-val df = spark.read.json("examples/src/main/resources/people.json")
-
-// Displays the content of the DataFrame to stdout
-df.show()
-{% endhighlight %}
-
+{% include_example create_df 
scala/org/apache/spark/examples/sql/SparkSqlExample.scala %}
 
 
 
@@ -121,14 +114,7 @@ from a Hive table, or from [Spark data 
sources](#data-sources).
 
 As an example, the following creates a DataFrame based on the content of a 
JSON file:
 
-{% highlight java %}
-SparkSession spark = ...; // An existing SparkSession.
-Dataset df = spark.read().json("examples/src/main/resources/people.json");
-
-// Displays the content of the DataFrame to stdout
-df.show();
-{% endhighlight %}
-
+{% include_example create_df 
java/org/apache/spark/examples/sql/JavaSparkSqlExample.java %}
 
 
 
@@ -169,110 +155,20 @@ Here we include some basic examples of structured data 
processing using Datasets
 
 
 
-{% highlight scala %}
-val spark: SparkSession // An existing SparkSession
-
-// Create the DataFrame
-val df = spark.read.json("examples/src/main/resources/people.json")
-
-// Show the content of the DataFrame
-df.show()
-// age  name
-// null Michael
-// 30   Andy
-// 19   Justin
-
-// Print the schema in a tree format
-df.printSchema()
-// root
-// |-- age: long (nullable = true)
-// |-- name: string (nullable = true)
-
-// Select only the "name" column
-df.select("name").show()
-// name
-// Michael
-// Andy
-// Justin
-
-// Select everybody, but increment the age by 1

spark git commit: [HOTFIX] Fix build break.

2016-07-13 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 7c8a399a2 -> 980db2bd4


[HOTFIX] Fix build break.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/980db2bd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/980db2bd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/980db2bd

Branch: refs/heads/branch-1.6
Commit: 980db2bd491398ac4d6db3a4550f1a377b6bf577
Parents: 7c8a399
Author: Reynold Xin 
Authored: Tue Jul 12 23:40:37 2016 -0700
Committer: Reynold Xin 
Committed: Tue Jul 12 23:40:37 2016 -0700

--
 .../expressions/ExpressionEvalHelperSuite.scala  | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/980db2bd/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala
index 64b65e2..a176fd8 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, 
GeneratedExpressionCode}
 import org.apache.spark.sql.types.{DataType, IntegerType}
 
 /**
@@ -43,12 +43,13 @@ class ExpressionEvalHelperSuite extends SparkFunSuite with 
ExpressionEvalHelper
 case class BadCodegenExpression() extends LeafExpression {
   override def nullable: Boolean = false
   override def eval(input: InternalRow): Any = 10
-  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
-ev.copy(code =
-  s"""
-|int some_variable = 11;
-|int ${ev.value} = 10;
-  """.stripMargin)
+
+  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): 
String = {
+s"""
+   |int some_variable = 11;
+   |int ${ev.value} = 10;
+""".stripMargin
   }
+
   override def dataType: DataType = IntegerType
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16489][SQL] Guard against variable reuse mistakes in expression code generation

2016-07-13 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 d1c992fea -> 7c8a399a2


[SPARK-16489][SQL] Guard against variable reuse mistakes in expression code 
generation

In code generation, it is incorrect for expressions to reuse variable names 
across different instances of itself. As an example, SPARK-16488 reports a bug 
in which pmod expression reuses variable name "r".

This patch updates ExpressionEvalHelper test harness to always project two 
instances of the same expression, which will help us catch variable reuse 
problems in expression unit tests. This patch also fixes the bug in crc32 
expression.

This is a test harness change, but I also created a new test suite for testing 
the test harness.

Author: Reynold Xin 

Closes #14146 from rxin/SPARK-16489.

(cherry picked from commit c377e49e38a290e5c4fbc178278069788674dfb7)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7c8a399a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7c8a399a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7c8a399a

Branch: refs/heads/branch-1.6
Commit: 7c8a399a292de113ebec4235ebe21c9a8fb85256
Parents: d1c992f
Author: Reynold Xin 
Authored: Tue Jul 12 10:07:23 2016 -0700
Committer: Reynold Xin 
Committed: Tue Jul 12 23:14:17 2016 -0700

--
 .../spark/sql/catalyst/expressions/misc.scala   |  7 +--
 .../expressions/ExpressionEvalHelper.scala  | 15 --
 .../expressions/ExpressionEvalHelperSuite.scala | 54 
 3 files changed, 68 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7c8a399a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 0f6d02f..cf9403c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -151,11 +151,12 @@ case class Crc32(child: Expression) extends 
UnaryExpression with ImplicitCastInp
 
   override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): 
String = {
 val CRC32 = "java.util.zip.CRC32"
+val checksum = ctx.freshName("checksum")
 nullSafeCodeGen(ctx, ev, value => {
   s"""
-$CRC32 checksum = new $CRC32();
-checksum.update($value, 0, $value.length);
-${ev.value} = checksum.getValue();
+$CRC32 $checksum = new $CRC32();
+$checksum.update($value, 0, $value.length);
+${ev.value} = $checksum.getValue();
   """
 })
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/7c8a399a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 074785e..9f463c5 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -160,9 +160,13 @@ trait ExpressionEvalHelper extends 
GeneratorDrivenPropertyChecks {
   expression: Expression,
   expected: Any,
   inputRow: InternalRow = EmptyRow): Unit = {
-
+// SPARK-16489 Explicitly doing code generation twice so code gen will 
fail if
+// some expression is reusing variable names across different instances.
+// This behavior is tested in ExpressionEvalHelperSuite.
 val plan = generateProject(
-  GenerateUnsafeProjection.generate(Alias(expression, 
s"Optimized($expression)")() :: Nil),
+  GenerateUnsafeProjection.generate(
+Alias(expression, s"Optimized($expression)1")() ::
+  Alias(expression, s"Optimized($expression)2")() :: Nil),
   expression)
 
 val unsafeRow = plan(inputRow)
@@ -170,13 +174,14 @@ trait ExpressionEvalHelper extends 
GeneratorDrivenPropertyChecks {
 
 if (expected == null) {
   if (!unsafeRow.isNullAt(0)) {
-val expectedRow = InternalRow(expected)
+val expectedRow = InternalRow(expected, expected)
 fail("Incorrect evaluation in unsafe mode: " +
   s"$expression, actual: $unsafeRow, expected: $expectedRow$input")
   }
 } else {
-  val lit = InternalRow(expected)
-  val

spark git commit: [SPARK-16488] Fix codegen variable namespace collision in pmod and partitionBy

2016-07-13 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 9808735e0 -> d1c992fea


[SPARK-16488] Fix codegen variable namespace collision in pmod and partitionBy

This patch fixes a variable namespace collision bug in pmod and partitionBy

Regression test for one possible occurrence. A more general fix in 
`ExpressionEvalHelper.checkEvaluation` will be in a subsequent PR.

Author: Sameer Agarwal 

Closes #14144 from sameeragarwal/codegen-bug.

(cherry picked from commit 9cc74f95edb6e4f56151966139cd0dc24e377949)
Signed-off-by: Reynold Xin 
(cherry picked from commit 689261465ad1dd443ebf764ad837243418b986ef)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d1c992fe
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d1c992fe
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d1c992fe

Branch: refs/heads/branch-1.6
Commit: d1c992fea3e5999a3494c398f5040d6102f30aff
Parents: 9808735
Author: Sameer Agarwal 
Authored: Mon Jul 11 20:26:01 2016 -0700
Committer: Reynold Xin 
Committed: Tue Jul 12 23:13:19 2016 -0700

--
 .../sql/catalyst/expressions/arithmetic.scala   | 25 ++--
 1 file changed, 13 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d1c992fe/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 61a17fd..cfae285 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -445,34 +445,35 @@ case class Pmod(left: Expression, right: Expression) 
extends BinaryArithmetic {
 
   override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): 
String = {
 nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+  val remainder = ctx.freshName("remainder")
   dataType match {
 case dt: DecimalType =>
   val decimalAdd = "$plus"
   s"""
-${ctx.javaType(dataType)} r = $eval1.remainder($eval2);
-if (r.compare(new org.apache.spark.sql.types.Decimal().set(0)) < 
0) {
-  ${ev.value} = (r.$decimalAdd($eval2)).remainder($eval2);
+${ctx.javaType(dataType)} $remainder = $eval1.remainder($eval2);
+if ($remainder.compare(new 
org.apache.spark.sql.types.Decimal().set(0)) < 0) {
+  ${ev.value} = ($remainder.$decimalAdd($eval2)).remainder($eval2);
 } else {
-  ${ev.value} = r;
+  ${ev.value} = $remainder;
 }
   """
 // byte and short are casted into int when add, minus, times or divide
 case ByteType | ShortType =>
   s"""
-${ctx.javaType(dataType)} r = (${ctx.javaType(dataType)})($eval1 % 
$eval2);
-if (r < 0) {
-  ${ev.value} = (${ctx.javaType(dataType)})((r + $eval2) % $eval2);
+${ctx.javaType(dataType)} $remainder = 
(${ctx.javaType(dataType)})($eval1 % $eval2);
+if ($remainder < 0) {
+  ${ev.value} = (${ctx.javaType(dataType)})(($remainder + $eval2) 
% $eval2);
 } else {
-  ${ev.value} = r;
+  ${ev.value} = $remainder;
 }
   """
 case _ =>
   s"""
-${ctx.javaType(dataType)} r = $eval1 % $eval2;
-if (r < 0) {
-  ${ev.value} = (r + $eval2) % $eval2;
+${ctx.javaType(dataType)} $remainder = $eval1 % $eval2;
+if ($remainder < 0) {
+  ${ev.value} = ($remainder + $eval2) % $eval2;
 } else {
-  ${ev.value} = r;
+  ${ev.value} = $remainder;
 }
   """
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16514][SQL] Fix various regex codegen bugs

2016-07-13 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 702178d1f -> 9808735e0


[SPARK-16514][SQL] Fix various regex codegen bugs

## What changes were proposed in this pull request?

RegexExtract and RegexReplace currently crash on non-nullable input due use of 
a hard-coded local variable name (e.g. compiles fail with `java.lang.Exception: 
failed to compile: org.codehaus.commons.compiler.CompileException: File 
'generated.java', Line 85, Column 26: Redefinition of local variable "m" `).

This changes those variables to use fresh names, and also in a few other places.

## How was this patch tested?

Unit tests. rxin

Author: Eric Liang 

Closes #14168 from ericl/sc-3906.

(cherry picked from commit 1c58fa905b6543d366d00b2e5394dfd633987f6d)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9808735e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9808735e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9808735e

Branch: refs/heads/branch-1.6
Commit: 9808735e0ce91c68df4c1ce82c44543995d44aed
Parents: 702178d
Author: Eric Liang 
Authored: Tue Jul 12 23:09:02 2016 -0700
Committer: Reynold Xin 
Committed: Tue Jul 12 23:09:31 2016 -0700

--
 .../expressions/regexpExpressions.scala | 48 ++--
 .../expressions/StringExpressionsSuite.scala|  6 +++
 2 files changed, 39 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9808735e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 9e484c5..154c7a0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -104,10 +104,11 @@ case class Like(left: Expression, right: Expression)
 """
   }
 } else {
+  val rightStr = ctx.freshName("rightStr")
   nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
 s"""
-  String rightStr = ${eval2}.toString();
-  ${patternClass} $pattern = 
${patternClass}.compile($escapeFunc(rightStr));
+  String $rightStr = ${eval2}.toString();
+  ${patternClass} $pattern = 
${patternClass}.compile($escapeFunc($rightStr));
   ${ev.value} = $pattern.matcher(${eval1}.toString()).matches();
 """
   })
@@ -152,10 +153,11 @@ case class RLike(left: Expression, right: Expression)
 """
   }
 } else {
+  val rightStr = ctx.freshName("rightStr")
   nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
 s"""
-  String rightStr = ${eval2}.toString();
-  ${patternClass} $pattern = ${patternClass}.compile(rightStr);
+  String $rightStr = ${eval2}.toString();
+  ${patternClass} $pattern = ${patternClass}.compile($rightStr);
   ${ev.value} = $pattern.matcher(${eval1}.toString()).find(0);
 """
   })
@@ -248,6 +250,8 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 val classNamePattern = classOf[Pattern].getCanonicalName
 val classNameStringBuffer = 
classOf[java.lang.StringBuffer].getCanonicalName
 
+val matcher = ctx.freshName("matcher")
+
 ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = 
null;")
 ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = 
null;")
 ctx.addMutableState("String", termLastReplacement, 
s"${termLastReplacement} = null;")
@@ -256,6 +260,12 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 ctx.addMutableState(classNameStringBuffer,
   termResult, s"${termResult} = new $classNameStringBuffer();")
 
+val setEvNotNull = if (nullable) {
+  s"${ev.isNull} = false;"
+} else {
+  ""
+}
+
 nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => {
 s"""
   if (!$regexp.equals(${termLastRegex})) {
@@ -269,14 +279,14 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 ${termLastReplacement} = ${termLastReplacementInUTF8}.toString();
   }
   ${termResult}.delete(0, ${termResult}.length());
-  java.util.regex.Matcher m = ${termPattern}.matcher($subject.toString());
+  java.util.regex.Matcher ${matcher} = 
${termPattern}.matcher($subject.toString());
 
-  while (m.find()) {
-m.appendReplacement(${termResult},

spark git commit: [SPARK-16514][SQL] Fix various regex codegen bugs

2016-07-13 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 4303d292b -> 41df62c59


[SPARK-16514][SQL] Fix various regex codegen bugs

## What changes were proposed in this pull request?

RegexExtract and RegexReplace currently crash on non-nullable input due use of 
a hard-coded local variable name (e.g. compiles fail with `java.lang.Exception: 
failed to compile: org.codehaus.commons.compiler.CompileException: File 
'generated.java', Line 85, Column 26: Redefinition of local variable "m" `).

This changes those variables to use fresh names, and also in a few other places.

## How was this patch tested?

Unit tests. rxin

Author: Eric Liang 

Closes #14168 from ericl/sc-3906.

(cherry picked from commit 1c58fa905b6543d366d00b2e5394dfd633987f6d)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41df62c5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41df62c5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41df62c5

Branch: refs/heads/branch-2.0
Commit: 41df62c595474d7afda6dbe76a558d8cb3be7ff2
Parents: 4303d29
Author: Eric Liang 
Authored: Tue Jul 12 23:09:02 2016 -0700
Committer: Reynold Xin 
Committed: Tue Jul 12 23:09:08 2016 -0700

--
 .../expressions/regexpExpressions.scala | 48 ++--
 .../expressions/StringExpressionsSuite.scala|  6 +++
 2 files changed, 39 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/41df62c5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 541b860..be82b3b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -108,10 +108,11 @@ case class Like(left: Expression, right: Expression)
 """)
   }
 } else {
+  val rightStr = ctx.freshName("rightStr")
   nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
 s"""
-  String rightStr = ${eval2}.toString();
-  ${patternClass} $pattern = 
${patternClass}.compile($escapeFunc(rightStr));
+  String $rightStr = ${eval2}.toString();
+  ${patternClass} $pattern = 
${patternClass}.compile($escapeFunc($rightStr));
   ${ev.value} = $pattern.matcher(${eval1}.toString()).matches();
 """
   })
@@ -157,10 +158,11 @@ case class RLike(left: Expression, right: Expression)
 """)
   }
 } else {
+  val rightStr = ctx.freshName("rightStr")
   nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
 s"""
-  String rightStr = ${eval2}.toString();
-  ${patternClass} $pattern = ${patternClass}.compile(rightStr);
+  String $rightStr = ${eval2}.toString();
+  ${patternClass} $pattern = ${patternClass}.compile($rightStr);
   ${ev.value} = $pattern.matcher(${eval1}.toString()).find(0);
 """
   })
@@ -259,6 +261,8 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 val classNamePattern = classOf[Pattern].getCanonicalName
 val classNameStringBuffer = 
classOf[java.lang.StringBuffer].getCanonicalName
 
+val matcher = ctx.freshName("matcher")
+
 ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = 
null;")
 ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = 
null;")
 ctx.addMutableState("String", termLastReplacement, 
s"${termLastReplacement} = null;")
@@ -267,6 +271,12 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 ctx.addMutableState(classNameStringBuffer,
   termResult, s"${termResult} = new $classNameStringBuffer();")
 
+val setEvNotNull = if (nullable) {
+  s"${ev.isNull} = false;"
+} else {
+  ""
+}
+
 nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => {
 s"""
   if (!$regexp.equals(${termLastRegex})) {
@@ -280,14 +290,14 @@ case class RegExpReplace(subject: Expression, regexp: 
Expression, rep: Expressio
 ${termLastReplacement} = ${termLastReplacementInUTF8}.toString();
   }
   ${termResult}.delete(0, ${termResult}.length());
-  java.util.regex.Matcher m = ${termPattern}.matcher($subject.toString());
+  java.util.regex.Matcher ${matcher} = 
${termPattern}.matcher($subject.toString());
 
-  while (m.find()) {
-m.appendReplacement(${termResult},

51 matches

Mail list logo