spark git commit: [SPARK-16036][SPARK-16037][SPARK-16034][SQL] Follow up code clean up and improvement

2016-06-19 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 d11f533de -> 19397caab


[SPARK-16036][SPARK-16037][SPARK-16034][SQL] Follow up code clean up and 
improvement

## What changes were proposed in this pull request?
This PR is the follow-up PR for 
https://github.com/apache/spark/pull/13754/files and 
https://github.com/apache/spark/pull/13749. I will comment inline to explain my 
changes.

## How was this patch tested?
Existing tests.

Author: Yin Huai 

Closes #13766 from yhuai/caseSensitivity.

(cherry picked from commit 6d0f921aedfdd3b7e8472b6776d0c7d8299190bd)
Signed-off-by: Yin Huai 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/19397caa
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/19397caa
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/19397caa

Branch: refs/heads/branch-2.0
Commit: 19397caab62b550269961a123bd11a34afc3a09b
Parents: d11f533
Author: Yin Huai 
Authored: Sun Jun 19 21:45:53 2016 -0700
Committer: Yin Huai 
Committed: Sun Jun 19 21:46:14 2016 -0700

--
 .../plans/logical/basicLogicalOperators.scala   |  2 ++
 .../org/apache/spark/sql/DataFrameWriter.scala  | 24 --
 .../sql/execution/datasources/DataSource.scala  |  9 ++---
 .../spark/sql/execution/datasources/rules.scala | 13 ++--
 .../spark/sql/internal/SessionState.scala   |  2 +-
 .../spark/sql/execution/command/DDLSuite.scala  | 20 +--
 .../spark/sql/hive/HiveSessionState.scala   |  2 +-
 .../sql/hive/InsertIntoHiveTableSuite.scala | 20 +++
 .../sql/hive/execution/HiveQuerySuite.scala | 35 
 .../sql/hive/execution/SQLQuerySuite.scala  | 32 --
 10 files changed, 98 insertions(+), 61 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/19397caa/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 69b8b05..ff3dcbc 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -369,6 +369,8 @@ case class InsertIntoTable(
 if (table.output.isEmpty) {
   None
 } else {
+  // Note: The parser (visitPartitionSpec in AstBuilder) already turns
+  // keys in partition to their lowercase forms.
   val staticPartCols = partition.filter(_._2.isDefined).keySet
   Some(table.output.filterNot(a => staticPartCols.contains(a.name)))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/19397caa/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index e6fc974..ca3972d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -245,29 +245,17 @@ final class DataFrameWriter[T] private[sql](ds: 
Dataset[T]) {
 if (partitioningColumns.isDefined) {
   throw new AnalysisException(
 "insertInto() can't be used together with partitionBy(). " +
-  "Partition columns are defined by the table into which is being 
inserted."
+  "Partition columns have already be defined for the table. " +
+  "It is not necessary to use partitionBy()."
   )
 }
 
-val partitions = normalizedParCols.map(_.map(col => col -> 
Option.empty[String]).toMap)
-val overwrite = mode == SaveMode.Overwrite
-
-// A partitioned relation's schema can be different from the input 
logicalPlan, since
-// partition columns are all moved after data columns. We Project to 
adjust the ordering.
-// TODO: this belongs to the analyzer.
-val input = normalizedParCols.map { parCols =>
-  val (inputPartCols, inputDataCols) = df.logicalPlan.output.partition { 
attr =>
-parCols.contains(attr.name)
-  }
-  Project(inputDataCols ++ inputPartCols, df.logicalPlan)
-}.getOrElse(df.logicalPlan)
-
 df.sparkSession.sessionState.executePlan(
   InsertIntoTable(
-UnresolvedRelation(tableIdent),
-partitions.getOrElse(Map.empty[String, Option[String]]),
-input,
-overwrite,
+table = 

spark git commit: [SPARK-16036][SPARK-16037][SPARK-16034][SQL] Follow up code clean up and improvement

2016-06-19 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/master 4f17fddcd -> 6d0f921ae


[SPARK-16036][SPARK-16037][SPARK-16034][SQL] Follow up code clean up and 
improvement

## What changes were proposed in this pull request?
This PR is the follow-up PR for 
https://github.com/apache/spark/pull/13754/files and 
https://github.com/apache/spark/pull/13749. I will comment inline to explain my 
changes.

## How was this patch tested?
Existing tests.

Author: Yin Huai 

Closes #13766 from yhuai/caseSensitivity.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6d0f921a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6d0f921a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6d0f921a

Branch: refs/heads/master
Commit: 6d0f921aedfdd3b7e8472b6776d0c7d8299190bd
Parents: 4f17fdd
Author: Yin Huai 
Authored: Sun Jun 19 21:45:53 2016 -0700
Committer: Yin Huai 
Committed: Sun Jun 19 21:45:53 2016 -0700

--
 .../plans/logical/basicLogicalOperators.scala   |  2 ++
 .../org/apache/spark/sql/DataFrameWriter.scala  | 24 --
 .../sql/execution/datasources/DataSource.scala  |  9 ++---
 .../spark/sql/execution/datasources/rules.scala | 13 ++--
 .../spark/sql/internal/SessionState.scala   |  2 +-
 .../spark/sql/execution/command/DDLSuite.scala  | 20 +--
 .../spark/sql/hive/HiveSessionState.scala   |  2 +-
 .../sql/hive/InsertIntoHiveTableSuite.scala | 20 +++
 .../sql/hive/execution/HiveQuerySuite.scala | 35 
 .../sql/hive/execution/SQLQuerySuite.scala  | 32 --
 10 files changed, 98 insertions(+), 61 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6d0f921a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 69b8b05..ff3dcbc 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -369,6 +369,8 @@ case class InsertIntoTable(
 if (table.output.isEmpty) {
   None
 } else {
+  // Note: The parser (visitPartitionSpec in AstBuilder) already turns
+  // keys in partition to their lowercase forms.
   val staticPartCols = partition.filter(_._2.isDefined).keySet
   Some(table.output.filterNot(a => staticPartCols.contains(a.name)))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/6d0f921a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index e6fc974..ca3972d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -245,29 +245,17 @@ final class DataFrameWriter[T] private[sql](ds: 
Dataset[T]) {
 if (partitioningColumns.isDefined) {
   throw new AnalysisException(
 "insertInto() can't be used together with partitionBy(). " +
-  "Partition columns are defined by the table into which is being 
inserted."
+  "Partition columns have already be defined for the table. " +
+  "It is not necessary to use partitionBy()."
   )
 }
 
-val partitions = normalizedParCols.map(_.map(col => col -> 
Option.empty[String]).toMap)
-val overwrite = mode == SaveMode.Overwrite
-
-// A partitioned relation's schema can be different from the input 
logicalPlan, since
-// partition columns are all moved after data columns. We Project to 
adjust the ordering.
-// TODO: this belongs to the analyzer.
-val input = normalizedParCols.map { parCols =>
-  val (inputPartCols, inputDataCols) = df.logicalPlan.output.partition { 
attr =>
-parCols.contains(attr.name)
-  }
-  Project(inputDataCols ++ inputPartCols, df.logicalPlan)
-}.getOrElse(df.logicalPlan)
-
 df.sparkSession.sessionState.executePlan(
   InsertIntoTable(
-UnresolvedRelation(tableIdent),
-partitions.getOrElse(Map.empty[String, Option[String]]),
-input,
-overwrite,
+table = UnresolvedRelation(tableIdent),
+partition = Map.empty[String, Option[String]],
+child = df.logicalPlan,
+overwrite = mode == 

spark git commit: [SPARK-16031] Add debug-only socket source in Structured Streaming

2016-06-19 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 5930d7a2e -> 4f17fddcd


[SPARK-16031] Add debug-only socket source in Structured Streaming

## What changes were proposed in this pull request?

This patch adds a text-based socket source similar to the one in Spark 
Streaming for debugging and tutorials. The source is clearly marked as 
debug-only so that users don't try to run it in production applications, 
because this type of source cannot provide HA without storing a lot of state in 
Spark.

## How was this patch tested?

Unit tests and manual tests in spark-shell.

Author: Matei Zaharia 

Closes #13748 from mateiz/socket-source.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f17fddc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f17fddc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f17fddc

Branch: refs/heads/master
Commit: 4f17fddcd57adeae0d7e31bd14423283d4b625e9
Parents: 5930d7a
Author: Matei Zaharia 
Authored: Sun Jun 19 21:27:04 2016 -0700
Committer: Reynold Xin 
Committed: Sun Jun 19 21:27:04 2016 -0700

--
 apache.spark.sql.sources.DataSourceRegister |   1 +
 .../execution/streaming/FileStreamSource.scala  |   2 +
 .../spark/sql/execution/streaming/Source.scala  |   3 +
 .../execution/streaming/StreamExecution.scala   |   1 +
 .../spark/sql/execution/streaming/memory.scala  |   2 +
 .../spark/sql/execution/streaming/socket.scala  | 144 +++
 .../streaming/TextSocketStreamSuite.scala   | 136 ++
 .../spark/sql/streaming/StreamSuite.scala   |   2 +
 .../test/DataStreamReaderWriterSuite.scala  |   2 +
 9 files changed, 293 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4f17fddc/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
--
diff --git 
a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
 
b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 9f8bb5d..27d32b5 100644
--- 
a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ 
b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -4,3 +4,4 @@ org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 org.apache.spark.sql.execution.datasources.text.TextFileFormat
 org.apache.spark.sql.execution.streaming.ConsoleSinkProvider
+org.apache.spark.sql.execution.streaming.TextSocketSourceProvider

http://git-wip-us.apache.org/repos/asf/spark/blob/4f17fddc/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index bef5616..9886ad0 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -128,4 +128,6 @@ class FileStreamSource(
   override def getOffset: Option[Offset] = 
Some(fetchMaxOffset()).filterNot(_.offset == -1)
 
   override def toString: String = s"FileStreamSource[$qualifiedBasePath]"
+
+  override def stop() {}
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/4f17fddc/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index 14450c2..9711478 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -39,4 +39,7 @@ trait Source  {
* same data for a particular `start` and `end` pair.
*/
   def getBatch(start: Option[Offset], end: Offset): DataFrame
+
+  /** Stop this source and free any resources it has allocated. */
+  def stop(): Unit
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/4f17fddc/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
 

spark git commit: [SPARK-16031] Add debug-only socket source in Structured Streaming

2016-06-19 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 80c6d4e3a -> d11f533de


[SPARK-16031] Add debug-only socket source in Structured Streaming

## What changes were proposed in this pull request?

This patch adds a text-based socket source similar to the one in Spark 
Streaming for debugging and tutorials. The source is clearly marked as 
debug-only so that users don't try to run it in production applications, 
because this type of source cannot provide HA without storing a lot of state in 
Spark.

## How was this patch tested?

Unit tests and manual tests in spark-shell.

Author: Matei Zaharia 

Closes #13748 from mateiz/socket-source.

(cherry picked from commit 4f17fddcd57adeae0d7e31bd14423283d4b625e9)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d11f533d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d11f533d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d11f533d

Branch: refs/heads/branch-2.0
Commit: d11f533ded502c6cc4a129e201362bca6e302028
Parents: 80c6d4e
Author: Matei Zaharia 
Authored: Sun Jun 19 21:27:04 2016 -0700
Committer: Reynold Xin 
Committed: Sun Jun 19 21:27:11 2016 -0700

--
 apache.spark.sql.sources.DataSourceRegister |   1 +
 .../execution/streaming/FileStreamSource.scala  |   2 +
 .../spark/sql/execution/streaming/Source.scala  |   3 +
 .../execution/streaming/StreamExecution.scala   |   1 +
 .../spark/sql/execution/streaming/memory.scala  |   2 +
 .../spark/sql/execution/streaming/socket.scala  | 144 +++
 .../streaming/TextSocketStreamSuite.scala   | 136 ++
 .../spark/sql/streaming/StreamSuite.scala   |   2 +
 .../test/DataStreamReaderWriterSuite.scala  |   2 +
 9 files changed, 293 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d11f533d/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
--
diff --git 
a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
 
b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 9f8bb5d..27d32b5 100644
--- 
a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ 
b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -4,3 +4,4 @@ org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 org.apache.spark.sql.execution.datasources.text.TextFileFormat
 org.apache.spark.sql.execution.streaming.ConsoleSinkProvider
+org.apache.spark.sql.execution.streaming.TextSocketSourceProvider

http://git-wip-us.apache.org/repos/asf/spark/blob/d11f533d/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index bef5616..9886ad0 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -128,4 +128,6 @@ class FileStreamSource(
   override def getOffset: Option[Offset] = 
Some(fetchMaxOffset()).filterNot(_.offset == -1)
 
   override def toString: String = s"FileStreamSource[$qualifiedBasePath]"
+
+  override def stop() {}
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/d11f533d/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index 14450c2..9711478 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -39,4 +39,7 @@ trait Source  {
* same data for a particular `start` and `end` pair.
*/
   def getBatch(start: Option[Offset], end: Offset): DataFrame
+
+  /** Stop this source and free any resources it has allocated. */
+  def stop(): Unit
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/d11f533d/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
--
diff 

[1/2] spark git commit: Preparing Spark release v1.6.2-rc2

2016-06-19 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 3d569d9ea -> 208348595


Preparing Spark release v1.6.2-rc2


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54b1121f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54b1121f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54b1121f

Branch: refs/heads/branch-1.6
Commit: 54b1121f351f056d6b67d2bb4efe0d553c0f7482
Parents: 3d569d9
Author: Patrick Wendell 
Authored: Sun Jun 19 14:06:21 2016 -0700
Committer: Patrick Wendell 
Committed: Sun Jun 19 14:06:21 2016 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 docker-integration-tests/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tags/pom.xml| 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 35 files changed, 35 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 6ec2ca4..438e6ed 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 2d778c5..85be37f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index a8d7863..15e60a3 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/docker-integration-tests/pom.xml
--
diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml
index a06e59c..0bc749f 100644
--- a/docker-integration-tests/pom.xml
+++ b/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 8e9e02e..f771a36 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 52c8a91..1ef7e7f 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/external/flume-sink/pom.xml

[spark] Git Push Summary

2016-06-19 Thread pwendell
Repository: spark
Updated Tags:  refs/tags/v1.6.2-rc2 [created] 54b1121f3

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[2/2] spark git commit: Preparing development version 1.6.3-SNAPSHOT

2016-06-19 Thread pwendell
Preparing development version 1.6.3-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20834859
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20834859
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20834859

Branch: refs/heads/branch-1.6
Commit: 20834859517438124b94a90a27014cc932d1eeb6
Parents: 54b1121
Author: Patrick Wendell 
Authored: Sun Jun 19 14:06:28 2016 -0700
Committer: Patrick Wendell 
Committed: Sun Jun 19 14:06:28 2016 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 docker-integration-tests/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tags/pom.xml| 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 35 files changed, 35 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 438e6ed..6ec2ca4 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 85be37f..2d778c5 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 15e60a3..a8d7863 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/docker-integration-tests/pom.xml
--
diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml
index 0bc749f..a06e59c 100644
--- a/docker-integration-tests/pom.xml
+++ b/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index f771a36..8e9e02e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 1ef7e7f..52c8a91 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml 

spark git commit: [SPARK-16040][MLLIB][DOC] spark.mllib PIC document extra line of refernece

2016-06-19 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 2c1c337ba -> 80c6d4e3a


[SPARK-16040][MLLIB][DOC] spark.mllib PIC document extra line of refernece

## What changes were proposed in this pull request?

In the 2.0 document, Line "A full example that produces the experiment 
described in the PIC paper can be found under examples/." is redundant.

There is already "Find full example code at 
"examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala"
 in the Spark repo.".

We should remove the first line, which is consistent with other documents.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)

Manual test

Author: wm...@hotmail.com 

Closes #13755 from wangmiao1981/doc.

(cherry picked from commit 5930d7a2e95b2fe4d470cf39546e5a12306553fe)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80c6d4e3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80c6d4e3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80c6d4e3

Branch: refs/heads/branch-2.0
Commit: 80c6d4e3a49fad4dac46738fe5458641f21b96a1
Parents: 2c1c337
Author: wm...@hotmail.com 
Authored: Sun Jun 19 20:19:40 2016 +0100
Committer: Sean Owen 
Committed: Sun Jun 19 20:19:48 2016 +0100

--
 docs/mllib-clustering.md | 4 
 1 file changed, 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/80c6d4e3/docs/mllib-clustering.md
--
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index 6897ba4..073927c 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -170,10 +170,6 @@ which contains the computed clustering assignments.
 Refer to the [`PowerIterationClustering` Scala 
docs](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClustering)
 and [`PowerIterationClusteringModel` Scala 
docs](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClusteringModel)
 for details on the API.
 
 {% include_example 
scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala %}
-
-A full example that produces the experiment described in the PIC paper can be 
found under
-[`examples/`](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala).
-
 
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-16040][MLLIB][DOC] spark.mllib PIC document extra line of refernece

2016-06-19 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 1b3a9b966 -> 5930d7a2e


[SPARK-16040][MLLIB][DOC] spark.mllib PIC document extra line of refernece

## What changes were proposed in this pull request?

In the 2.0 document, Line "A full example that produces the experiment 
described in the PIC paper can be found under examples/." is redundant.

There is already "Find full example code at 
"examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala"
 in the Spark repo.".

We should remove the first line, which is consistent with other documents.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, 
manual tests)

Manual test

Author: wm...@hotmail.com 

Closes #13755 from wangmiao1981/doc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5930d7a2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5930d7a2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5930d7a2

Branch: refs/heads/master
Commit: 5930d7a2e95b2fe4d470cf39546e5a12306553fe
Parents: 1b3a9b9
Author: wm...@hotmail.com 
Authored: Sun Jun 19 20:19:40 2016 +0100
Committer: Sean Owen 
Committed: Sun Jun 19 20:19:40 2016 +0100

--
 docs/mllib-clustering.md | 4 
 1 file changed, 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5930d7a2/docs/mllib-clustering.md
--
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index 6897ba4..073927c 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -170,10 +170,6 @@ which contains the computed clustering assignments.
 Refer to the [`PowerIterationClustering` Scala 
docs](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClustering)
 and [`PowerIterationClusteringModel` Scala 
docs](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClusteringModel)
 for details on the API.
 
 {% include_example 
scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala %}
-
-A full example that produces the experiment described in the PIC paper can be 
found under
-[`examples/`](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala).
-
 
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15942][REPL] Unblock `:reset` command in REPL.

2016-06-19 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 dc85bd0a0 -> 2c1c337ba


[SPARK-15942][REPL] Unblock `:reset` command in REPL.

## What changes were proposed in this pull
(Paste from JIRA issue.)
As a follow up for SPARK-15697, I have following semantics for `:reset` command.
On `:reset` we forget all that user has done but not the initialization of 
spark. To avoid confusion or make it more clear, we show the message `spark` 
and `sc` are not erased, infact they are in same state as they were left by 
previous operations done by the user.
While doing above, somewhere I felt that this is not usually what reset means. 
But an accidental shutdown of a cluster can be very costly, so may be in that 
sense this is less surprising and still useful.

## How was this patch tested?

Manually, by calling `:reset` command, by both altering the state of 
SparkContext and creating some local variables.

Author: Prashant Sharma 
Author: Prashant Sharma 

Closes #13661 from ScrapCodes/repl-reset-command.

(cherry picked from commit 1b3a9b966a7813e2406dfb020e83605af22f9ef3)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c1c337b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c1c337b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c1c337b

Branch: refs/heads/branch-2.0
Commit: 2c1c337ba5984b9e495b4d02bf865e56fd83ab03
Parents: dc85bd0
Author: Prashant Sharma 
Authored: Sun Jun 19 20:12:00 2016 +0100
Committer: Sean Owen 
Committed: Sun Jun 19 20:12:08 2016 +0100

--
 .../scala/org/apache/spark/repl/SparkILoop.scala| 16 ++--
 .../scala/org/apache/spark/repl/ReplSuite.scala |  3 ++-
 2 files changed, 16 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2c1c337b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
--
diff --git 
a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala 
b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index dcf3209..2707b08 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -36,7 +36,11 @@ class SparkILoop(in0: Option[BufferedReader], out: 
JPrintWriter)
   def initializeSpark() {
 intp.beQuietDuring {
   processLine("""
-@transient val spark = org.apache.spark.repl.Main.createSparkSession()
+@transient val spark = if (org.apache.spark.repl.Main.sparkSession != 
null) {
+org.apache.spark.repl.Main.sparkSession
+  } else {
+org.apache.spark.repl.Main.createSparkSession()
+  }
 @transient val sc = {
   val _sc = spark.sparkContext
   _sc.uiWebUrl.foreach(webUrl => println(s"Spark context Web UI 
available at ${webUrl}"))
@@ -50,6 +54,7 @@ class SparkILoop(in0: Option[BufferedReader], out: 
JPrintWriter)
   processLine("import spark.implicits._")
   processLine("import spark.sql")
   processLine("import org.apache.spark.sql.functions._")
+  replayCommandStack = Nil // remove above commands from session history.
 }
   }
 
@@ -70,7 +75,8 @@ class SparkILoop(in0: Option[BufferedReader], out: 
JPrintWriter)
 echo("Type :help for more information.")
   }
 
-  private val blockedCommands = Set[String]("reset")
+  /** Add repl commands that needs to be blocked. e.g. reset */
+  private val blockedCommands = Set[String]()
 
   /** Standard commands */
   lazy val sparkStandardCommands: List[SparkILoop.this.LoopCommand] =
@@ -88,6 +94,12 @@ class SparkILoop(in0: Option[BufferedReader], out: 
JPrintWriter)
 initializeSpark()
 super.loadFiles(settings)
   }
+
+  override def resetCommand(line: String): Unit = {
+super.resetCommand(line)
+initializeSpark()
+echo("Note that after :reset, state of SparkSession and SparkContext is 
unchanged.")
+  }
 }
 
 object SparkILoop {

http://git-wip-us.apache.org/repos/asf/spark/blob/2c1c337b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
--
diff --git 
a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala 
b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 2444e93..c10db94 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -49,7 +49,8 @@ class ReplSuite extends SparkFunSuite {
 
 val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
   

spark git commit: [SPARK-15942][REPL] Unblock `:reset` command in REPL.

2016-06-19 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 001a58960 -> 1b3a9b966


[SPARK-15942][REPL] Unblock `:reset` command in REPL.

## What changes were proposed in this pull
(Paste from JIRA issue.)
As a follow up for SPARK-15697, I have following semantics for `:reset` command.
On `:reset` we forget all that user has done but not the initialization of 
spark. To avoid confusion or make it more clear, we show the message `spark` 
and `sc` are not erased, infact they are in same state as they were left by 
previous operations done by the user.
While doing above, somewhere I felt that this is not usually what reset means. 
But an accidental shutdown of a cluster can be very costly, so may be in that 
sense this is less surprising and still useful.

## How was this patch tested?

Manually, by calling `:reset` command, by both altering the state of 
SparkContext and creating some local variables.

Author: Prashant Sharma 
Author: Prashant Sharma 

Closes #13661 from ScrapCodes/repl-reset-command.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1b3a9b96
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1b3a9b96
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1b3a9b96

Branch: refs/heads/master
Commit: 1b3a9b966a7813e2406dfb020e83605af22f9ef3
Parents: 001a589
Author: Prashant Sharma 
Authored: Sun Jun 19 20:12:00 2016 +0100
Committer: Sean Owen 
Committed: Sun Jun 19 20:12:00 2016 +0100

--
 .../scala/org/apache/spark/repl/SparkILoop.scala| 16 ++--
 .../scala/org/apache/spark/repl/ReplSuite.scala |  3 ++-
 2 files changed, 16 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1b3a9b96/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
--
diff --git 
a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala 
b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index dcf3209..2707b08 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -36,7 +36,11 @@ class SparkILoop(in0: Option[BufferedReader], out: 
JPrintWriter)
   def initializeSpark() {
 intp.beQuietDuring {
   processLine("""
-@transient val spark = org.apache.spark.repl.Main.createSparkSession()
+@transient val spark = if (org.apache.spark.repl.Main.sparkSession != 
null) {
+org.apache.spark.repl.Main.sparkSession
+  } else {
+org.apache.spark.repl.Main.createSparkSession()
+  }
 @transient val sc = {
   val _sc = spark.sparkContext
   _sc.uiWebUrl.foreach(webUrl => println(s"Spark context Web UI 
available at ${webUrl}"))
@@ -50,6 +54,7 @@ class SparkILoop(in0: Option[BufferedReader], out: 
JPrintWriter)
   processLine("import spark.implicits._")
   processLine("import spark.sql")
   processLine("import org.apache.spark.sql.functions._")
+  replayCommandStack = Nil // remove above commands from session history.
 }
   }
 
@@ -70,7 +75,8 @@ class SparkILoop(in0: Option[BufferedReader], out: 
JPrintWriter)
 echo("Type :help for more information.")
   }
 
-  private val blockedCommands = Set[String]("reset")
+  /** Add repl commands that needs to be blocked. e.g. reset */
+  private val blockedCommands = Set[String]()
 
   /** Standard commands */
   lazy val sparkStandardCommands: List[SparkILoop.this.LoopCommand] =
@@ -88,6 +94,12 @@ class SparkILoop(in0: Option[BufferedReader], out: 
JPrintWriter)
 initializeSpark()
 super.loadFiles(settings)
   }
+
+  override def resetCommand(line: String): Unit = {
+super.resetCommand(line)
+initializeSpark()
+echo("Note that after :reset, state of SparkSession and SparkContext is 
unchanged.")
+  }
 }
 
 object SparkILoop {

http://git-wip-us.apache.org/repos/asf/spark/blob/1b3a9b96/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
--
diff --git 
a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala 
b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 2444e93..c10db94 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -49,7 +49,8 @@ class ReplSuite extends SparkFunSuite {
 
 val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
 System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
-
+Main.sparkContext = null
+Main.sparkSession = null // 

spark git commit: Revert "[SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time"

2016-06-19 Thread davies
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 41efd2091 -> 3d569d9ea


Revert "[SPARK-15613] [SQL] Fix incorrect days to millis conversion due to 
Daylight Saving Time"

This reverts commit 41efd2091781b31118c6d37be59e4f0f4ae2bf66.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d569d9e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d569d9e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d569d9e

Branch: refs/heads/branch-1.6
Commit: 3d569d9ea9357d6161b0c75ce2e6f045c3447458
Parents: 41efd20
Author: Davies Liu 
Authored: Sun Jun 19 09:30:59 2016 -0700
Committer: Davies Liu 
Committed: Sun Jun 19 09:30:59 2016 -0700

--
 .../spark/sql/catalyst/util/DateTimeUtils.scala | 51 ++--
 .../org/apache/spark/sql/types/DateType.scala   |  2 +-
 .../sql/catalyst/util/DateTimeTestUtils.scala   | 40 ---
 .../sql/catalyst/util/DateTimeUtilsSuite.scala  | 40 ---
 4 files changed, 4 insertions(+), 129 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3d569d9e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 94692d8..2b93882 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -89,8 +89,8 @@ object DateTimeUtils {
 
   // reverse of millisToDays
   def daysToMillis(days: SQLDate): Long = {
-val millisLocal = days.toLong * MILLIS_PER_DAY
-millisLocal - getOffsetFromLocalMillis(millisLocal, 
threadLocalLocalTimeZone.get())
+val millisUtc = days.toLong * MILLIS_PER_DAY
+millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc)
   }
 
   def dateToString(days: SQLDate): String =
@@ -820,41 +820,6 @@ object DateTimeUtils {
   }
 
   /**
-   * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in 
given timezone.
-   */
-  private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long 
= {
-var guess = tz.getRawOffset
-// the actual offset should be calculated based on milliseconds in UTC
-val offset = tz.getOffset(millisLocal - guess)
-if (offset != guess) {
-  guess = tz.getOffset(millisLocal - offset)
-  if (guess != offset) {
-// fallback to do the reverse lookup using java.sql.Timestamp
-// this should only happen near the start or end of DST
-val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
-val year = getYear(days)
-val month = getMonth(days)
-val day = getDayOfMonth(days)
-
-var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
-if (millisOfDay < 0) {
-  millisOfDay += MILLIS_PER_DAY.toInt
-}
-val seconds = (millisOfDay / 1000L).toInt
-val hh = seconds / 3600
-val mm = seconds / 60 % 60
-val ss = seconds % 60
-val nano = millisOfDay % 1000 * 100
-
-// create a Timestamp to get the unix timestamp (in UTC)
-val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, 
nano)
-guess = (millisLocal - timestamp.getTime).toInt
-  }
-}
-guess
-  }
-
-  /**
* Returns a timestamp of given timezone from utc timestamp, with the same 
string
* representation in their timezone.
*/
@@ -870,17 +835,7 @@ object DateTimeUtils {
*/
   def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
 val tz = TimeZone.getTimeZone(timeZone)
-val offset = getOffsetFromLocalMillis(time / 1000L, tz)
+val offset = tz.getOffset(time / 1000L)
 time - offset * 1000L
   }
-
-  /**
-   * Re-initialize the current thread's thread locals. Exposed for testing.
-   */
-  private[util] def resetThreadLocals(): Unit = {
-threadLocalGmtCalendar.remove()
-threadLocalLocalTimeZone.remove()
-threadLocalTimestampFormat.remove()
-threadLocalDateFormat.remove()
-  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/3d569d9e/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
index 2c96623..1d73e40 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
+++ 

spark git commit: [SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time

2016-06-19 Thread davies
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 3f1d730e9 -> 41efd2091


[SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight 
Saving Time

## What changes were proposed in this pull request?

Internally, we use Int to represent a date (the days since 1970-01-01), when we 
convert that into unix timestamp (milli-seconds since epoch in UTC), we get the 
offset of a timezone using local millis (the milli-seconds since 1970-01-01 in 
a timezone), but TimeZone.getOffset() expect unix timestamp, the result could 
be off by one hour (in Daylight Saving Time (DST) or not).

This PR change to use best effort approximate of posix timestamp to lookup the 
offset. In the event of changing of DST, Some time is not defined (for example, 
2016-03-13 02:00:00 PST), or could lead to multiple valid result in UTC (for 
example, 2016-11-06 01:00:00), this best effort approximate should be enough in 
practice.

## How was this patch tested?

Added regression tests.

Author: Davies Liu 

Closes #13652 from davies/fix_timezone.

(cherry picked from commit 001a58960311b07fe80e2f01e473f4987948d06e)
Signed-off-by: Davies Liu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41efd209
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41efd209
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41efd209

Branch: refs/heads/branch-1.6
Commit: 41efd2091781b31118c6d37be59e4f0f4ae2bf66
Parents: 3f1d730
Author: Davies Liu 
Authored: Sun Jun 19 00:34:52 2016 -0700
Committer: Davies Liu 
Committed: Sun Jun 19 00:35:17 2016 -0700

--
 .../spark/sql/catalyst/util/DateTimeUtils.scala | 51 ++--
 .../org/apache/spark/sql/types/DateType.scala   |  2 +-
 .../sql/catalyst/util/DateTimeTestUtils.scala   | 40 +++
 .../sql/catalyst/util/DateTimeUtilsSuite.scala  | 40 +++
 4 files changed, 129 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/41efd209/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 2b93882..94692d8 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -89,8 +89,8 @@ object DateTimeUtils {
 
   // reverse of millisToDays
   def daysToMillis(days: SQLDate): Long = {
-val millisUtc = days.toLong * MILLIS_PER_DAY
-millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc)
+val millisLocal = days.toLong * MILLIS_PER_DAY
+millisLocal - getOffsetFromLocalMillis(millisLocal, 
threadLocalLocalTimeZone.get())
   }
 
   def dateToString(days: SQLDate): String =
@@ -820,6 +820,41 @@ object DateTimeUtils {
   }
 
   /**
+   * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in 
given timezone.
+   */
+  private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long 
= {
+var guess = tz.getRawOffset
+// the actual offset should be calculated based on milliseconds in UTC
+val offset = tz.getOffset(millisLocal - guess)
+if (offset != guess) {
+  guess = tz.getOffset(millisLocal - offset)
+  if (guess != offset) {
+// fallback to do the reverse lookup using java.sql.Timestamp
+// this should only happen near the start or end of DST
+val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
+val year = getYear(days)
+val month = getMonth(days)
+val day = getDayOfMonth(days)
+
+var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
+if (millisOfDay < 0) {
+  millisOfDay += MILLIS_PER_DAY.toInt
+}
+val seconds = (millisOfDay / 1000L).toInt
+val hh = seconds / 3600
+val mm = seconds / 60 % 60
+val ss = seconds % 60
+val nano = millisOfDay % 1000 * 100
+
+// create a Timestamp to get the unix timestamp (in UTC)
+val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, 
nano)
+guess = (millisLocal - timestamp.getTime).toInt
+  }
+}
+guess
+  }
+
+  /**
* Returns a timestamp of given timezone from utc timestamp, with the same 
string
* representation in their timezone.
*/
@@ -835,7 +870,17 @@ object DateTimeUtils {
*/
   def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
 val tz = TimeZone.getTimeZone(timeZone)
-val offset = 

spark git commit: [SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time

2016-06-19 Thread davies
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 ee6eea644 -> dc85bd0a0


[SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight 
Saving Time

## What changes were proposed in this pull request?

Internally, we use Int to represent a date (the days since 1970-01-01), when we 
convert that into unix timestamp (milli-seconds since epoch in UTC), we get the 
offset of a timezone using local millis (the milli-seconds since 1970-01-01 in 
a timezone), but TimeZone.getOffset() expect unix timestamp, the result could 
be off by one hour (in Daylight Saving Time (DST) or not).

This PR change to use best effort approximate of posix timestamp to lookup the 
offset. In the event of changing of DST, Some time is not defined (for example, 
2016-03-13 02:00:00 PST), or could lead to multiple valid result in UTC (for 
example, 2016-11-06 01:00:00), this best effort approximate should be enough in 
practice.

## How was this patch tested?

Added regression tests.

Author: Davies Liu 

Closes #13652 from davies/fix_timezone.

(cherry picked from commit 001a58960311b07fe80e2f01e473f4987948d06e)
Signed-off-by: Davies Liu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dc85bd0a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dc85bd0a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dc85bd0a

Branch: refs/heads/branch-2.0
Commit: dc85bd0a08d7a511c667df2cbf27d6c2085c19dc
Parents: ee6eea6
Author: Davies Liu 
Authored: Sun Jun 19 00:34:52 2016 -0700
Committer: Davies Liu 
Committed: Sun Jun 19 00:35:02 2016 -0700

--
 .../spark/sql/catalyst/util/DateTimeUtils.scala | 51 ++--
 .../org/apache/spark/sql/types/DateType.scala   |  2 +-
 .../sql/catalyst/util/DateTimeTestUtils.scala   | 40 +++
 .../sql/catalyst/util/DateTimeUtilsSuite.scala  | 40 +++
 4 files changed, 129 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/dc85bd0a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index e08328a..56bf9a7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -100,8 +100,8 @@ object DateTimeUtils {
 
   // reverse of millisToDays
   def daysToMillis(days: SQLDate): Long = {
-val millisUtc = days.toLong * MILLIS_PER_DAY
-millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc)
+val millisLocal = days.toLong * MILLIS_PER_DAY
+millisLocal - getOffsetFromLocalMillis(millisLocal, 
threadLocalLocalTimeZone.get())
   }
 
   def dateToString(days: SQLDate): String =
@@ -851,6 +851,41 @@ object DateTimeUtils {
   }
 
   /**
+   * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in 
given timezone.
+   */
+  private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long 
= {
+var guess = tz.getRawOffset
+// the actual offset should be calculated based on milliseconds in UTC
+val offset = tz.getOffset(millisLocal - guess)
+if (offset != guess) {
+  guess = tz.getOffset(millisLocal - offset)
+  if (guess != offset) {
+// fallback to do the reverse lookup using java.sql.Timestamp
+// this should only happen near the start or end of DST
+val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
+val year = getYear(days)
+val month = getMonth(days)
+val day = getDayOfMonth(days)
+
+var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
+if (millisOfDay < 0) {
+  millisOfDay += MILLIS_PER_DAY.toInt
+}
+val seconds = (millisOfDay / 1000L).toInt
+val hh = seconds / 3600
+val mm = seconds / 60 % 60
+val ss = seconds % 60
+val nano = millisOfDay % 1000 * 100
+
+// create a Timestamp to get the unix timestamp (in UTC)
+val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, 
nano)
+guess = (millisLocal - timestamp.getTime).toInt
+  }
+}
+guess
+  }
+
+  /**
* Returns a timestamp of given timezone from utc timestamp, with the same 
string
* representation in their timezone.
*/
@@ -866,7 +901,17 @@ object DateTimeUtils {
*/
   def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
 val tz = TimeZone.getTimeZone(timeZone)
-val offset = 

spark git commit: [SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time

2016-06-19 Thread davies
Repository: spark
Updated Branches:
  refs/heads/master ce3b98bae -> 001a58960


[SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight 
Saving Time

## What changes were proposed in this pull request?

Internally, we use Int to represent a date (the days since 1970-01-01), when we 
convert that into unix timestamp (milli-seconds since epoch in UTC), we get the 
offset of a timezone using local millis (the milli-seconds since 1970-01-01 in 
a timezone), but TimeZone.getOffset() expect unix timestamp, the result could 
be off by one hour (in Daylight Saving Time (DST) or not).

This PR change to use best effort approximate of posix timestamp to lookup the 
offset. In the event of changing of DST, Some time is not defined (for example, 
2016-03-13 02:00:00 PST), or could lead to multiple valid result in UTC (for 
example, 2016-11-06 01:00:00), this best effort approximate should be enough in 
practice.

## How was this patch tested?

Added regression tests.

Author: Davies Liu 

Closes #13652 from davies/fix_timezone.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/001a5896
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/001a5896
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/001a5896

Branch: refs/heads/master
Commit: 001a58960311b07fe80e2f01e473f4987948d06e
Parents: ce3b98b
Author: Davies Liu 
Authored: Sun Jun 19 00:34:52 2016 -0700
Committer: Davies Liu 
Committed: Sun Jun 19 00:34:52 2016 -0700

--
 .../spark/sql/catalyst/util/DateTimeUtils.scala | 51 ++--
 .../org/apache/spark/sql/types/DateType.scala   |  2 +-
 .../sql/catalyst/util/DateTimeTestUtils.scala   | 40 +++
 .../sql/catalyst/util/DateTimeUtilsSuite.scala  | 40 +++
 4 files changed, 129 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/001a5896/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index e08328a..56bf9a7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -100,8 +100,8 @@ object DateTimeUtils {
 
   // reverse of millisToDays
   def daysToMillis(days: SQLDate): Long = {
-val millisUtc = days.toLong * MILLIS_PER_DAY
-millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc)
+val millisLocal = days.toLong * MILLIS_PER_DAY
+millisLocal - getOffsetFromLocalMillis(millisLocal, 
threadLocalLocalTimeZone.get())
   }
 
   def dateToString(days: SQLDate): String =
@@ -851,6 +851,41 @@ object DateTimeUtils {
   }
 
   /**
+   * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in 
given timezone.
+   */
+  private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long 
= {
+var guess = tz.getRawOffset
+// the actual offset should be calculated based on milliseconds in UTC
+val offset = tz.getOffset(millisLocal - guess)
+if (offset != guess) {
+  guess = tz.getOffset(millisLocal - offset)
+  if (guess != offset) {
+// fallback to do the reverse lookup using java.sql.Timestamp
+// this should only happen near the start or end of DST
+val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
+val year = getYear(days)
+val month = getMonth(days)
+val day = getDayOfMonth(days)
+
+var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
+if (millisOfDay < 0) {
+  millisOfDay += MILLIS_PER_DAY.toInt
+}
+val seconds = (millisOfDay / 1000L).toInt
+val hh = seconds / 3600
+val mm = seconds / 60 % 60
+val ss = seconds % 60
+val nano = millisOfDay % 1000 * 100
+
+// create a Timestamp to get the unix timestamp (in UTC)
+val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, 
nano)
+guess = (millisLocal - timestamp.getTime).toInt
+  }
+}
+guess
+  }
+
+  /**
* Returns a timestamp of given timezone from utc timestamp, with the same 
string
* representation in their timezone.
*/
@@ -866,7 +901,17 @@ object DateTimeUtils {
*/
   def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
 val tz = TimeZone.getTimeZone(timeZone)
-val offset = tz.getOffset(time / 1000L)
+val offset = getOffsetFromLocalMillis(time / 1000L, tz)
 time - offset * 1000L
   }
+
+