spark git commit: [SPARK-16726][SQL] Improve `Union/Intersect/Except` error messages on incompatible types

2016-08-01 Thread hvanhovell
Repository: spark
Updated Branches:
  refs/heads/master 579fbcf3b -> 64d8f37c7


[SPARK-16726][SQL] Improve `Union/Intersect/Except` error messages on 
incompatible types

## What changes were proposed in this pull request?

Currently, `UNION` queries on incompatible types show misleading error 
messages, i.e., `unresolved operator Union`. We had better show a more correct 
message. This will help users in the situation of 
[SPARK-16704](https://issues.apache.org/jira/browse/SPARK-16704).

**Before**
```scala
scala> sql("select 1,2,3 union (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: unresolved operator 'Union;
scala> sql("select 1,2,3 intersect (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: unresolved operator 'Intersect;
scala> sql("select 1,2,3 except (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: unresolved operator 'Except;
```

**After**
```scala
scala> sql("select 1,2,3 union (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: Union can only be performed on tables 
with the compatible column types. ArrayType(IntegerType,false) <> IntegerType 
at the second column of the second table;
scala> sql("select 1,2,3 intersect (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: Intersect can only be performed on 
tables with the compatible column types. ArrayType(IntegerType,false) <> 
IntegerType at the second column of the second table;
scala> sql("select 1,2,3 except (select array(1),array(2),3)")
org.apache.spark.sql.AnalysisException: Except can only be performed on tables 
with the compatible column types. ArrayType(IntegerType,false) <> IntegerType 
at the first column of the second table;
```

## How was this patch tested?

Pass the Jenkins test with a new test case.

Author: Dongjoon Hyun 

Closes #14355 from dongjoon-hyun/SPARK-16726.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/64d8f37c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/64d8f37c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/64d8f37c

Branch: refs/heads/master
Commit: 64d8f37c717cbc9c1c3649cae4c7cc4e628cd72d
Parents: 579fbcf
Author: Dongjoon Hyun 
Authored: Mon Aug 1 11:12:58 2016 +0200
Committer: Herman van Hovell 
Committed: Mon Aug 1 11:12:58 2016 +0200

--
 .../sql/catalyst/analysis/CheckAnalysis.scala   | 44 ++--
 .../catalyst/analysis/AnalysisErrorSuite.scala  | 15 +++
 2 files changed, 46 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/64d8f37c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 8b87a4e..41b7e62 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -253,19 +253,6 @@ trait CheckAnalysis extends PredicateHelper {
   }
 }
 
-  case s @ SetOperation(left, right) if left.output.length != 
right.output.length =>
-failAnalysis(
-  s"${s.nodeName} can only be performed on tables with the same 
number of columns, " +
-s"but the left table has ${left.output.length} columns and the 
right has " +
-s"${right.output.length}")
-
-  case s: Union if s.children.exists(_.output.length != 
s.children.head.output.length) =>
-val firstError = s.children.find(_.output.length != 
s.children.head.output.length).get
-failAnalysis(
-  s"Unions can only be performed on tables with the same number of 
columns, " +
-s"but one table has '${firstError.output.length}' columns and 
another table has " +
-s"'${s.children.head.output.length}' columns")
-
   case GlobalLimit(limitExpr, _) => checkLimitClause(limitExpr)
 
   case LocalLimit(limitExpr, _) => checkLimitClause(limitExpr)
@@ -280,6 +267,37 @@ trait CheckAnalysis extends PredicateHelper {
   case p if 
p.expressions.exists(PredicateSubquery.hasPredicateSubquery) =>
 failAnalysis(s"Predicate sub-queries can only be used in a Filter: 
$p")
 
+  case _: Union | _: SetOperation if operator.children.length > 1 =>
+def dataTypes(plan: LogicalPlan): Seq[DataType] = 
plan.output.map(_.dataType)
+def ordinalNumber(i: Int): String = i match {
+  case 0 => "first"
+  case 1 => "second"
+  case i => s"${i}th"
+}
+   

spark git commit: [SPARK-16485][DOC][ML] Remove useless latex in a log messge.

2016-08-01 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 64d8f37c7 -> 2a0de7dc9


[SPARK-16485][DOC][ML] Remove useless latex in a log messge.

## What changes were proposed in this pull request?

Removed useless latex in a log messge.

## How was this patch tested?

Check generated scaladoc.

Author: Shuai Lin 

Closes #14380 from lins05/fix-docs-formatting.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a0de7dc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a0de7dc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a0de7dc

Branch: refs/heads/master
Commit: 2a0de7dc995844984d7dbb4238418967c6bbac70
Parents: 64d8f37
Author: Shuai Lin 
Authored: Mon Aug 1 06:54:18 2016 -0700
Committer: Sean Owen 
Committed: Mon Aug 1 06:54:18 2016 -0700

--
 .../org/apache/spark/mllib/optimization/GradientDescentSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2a0de7dc/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
index 1c9b7c7..37eb794 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
@@ -131,7 +131,7 @@ class GradientDescentSuite extends SparkFunSuite with 
MLlibTestSparkContext with
 assert(
   loss1(0) ~= (loss0(0) + (math.pow(initialWeightsWithIntercept(0), 2) +
 math.pow(initialWeightsWithIntercept(1), 2)) / 2) absTol 1E-5,
-  """For non-zero weights, the regVal should be \frac{1}{2}\sum_i 
w_i^2.""")
+  """For non-zero weights, the regVal should be 0.5 * sum(w_i ^ 2).""")
 
 assert(
   (newWeights1(0) ~= (newWeights0(0) - initialWeightsWithIntercept(0)) 
absTol 1E-5) &&


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-16778][SQL][TRIVIAL] Fix deprecation warning with SQLContext

2016-08-01 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c651ff53a -> 4bdc55898


[SPARK-16778][SQL][TRIVIAL] Fix deprecation warning with SQLContext

## What changes were proposed in this pull request?

Change to non-deprecated constructor for SQLContext.

## How was this patch tested?

Existing tests

Author: Holden Karau 

Closes #14406 from 
holdenk/SPARK-16778-fix-use-of-deprecated-SQLContext-constructor.

(cherry picked from commit 1e9b59b73bdb8aacf5a85e0eed29efc6485a3bc3)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4bdc5589
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4bdc5589
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4bdc5589

Branch: refs/heads/branch-2.0
Commit: 4bdc558989ef4a9490ca42e7330c10136151134b
Parents: c651ff5
Author: Holden Karau 
Authored: Mon Aug 1 06:55:31 2016 -0700
Committer: Sean Owen 
Committed: Mon Aug 1 06:55:47 2016 -0700

--
 .../test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4bdc5589/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 2cd3f47..5d34804 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -28,7 +28,7 @@ class SQLConfSuite extends QueryTest with SharedSQLContext {
   test("propagate from spark conf") {
 // We create a new context here to avoid order dependence with other tests 
that might call
 // clear().
-val newContext = new SQLContext(sparkContext)
+val newContext = new 
SQLContext(SparkSession.builder().sparkContext(sparkContext).getOrCreate())
 assert(newContext.getConf("spark.sql.testkey", "false") === "true")
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-16778][SQL][TRIVIAL] Fix deprecation warning with SQLContext

2016-08-01 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 2a0de7dc9 -> 1e9b59b73


[SPARK-16778][SQL][TRIVIAL] Fix deprecation warning with SQLContext

## What changes were proposed in this pull request?

Change to non-deprecated constructor for SQLContext.

## How was this patch tested?

Existing tests

Author: Holden Karau 

Closes #14406 from 
holdenk/SPARK-16778-fix-use-of-deprecated-SQLContext-constructor.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1e9b59b7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1e9b59b7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1e9b59b7

Branch: refs/heads/master
Commit: 1e9b59b73bdb8aacf5a85e0eed29efc6485a3bc3
Parents: 2a0de7d
Author: Holden Karau 
Authored: Mon Aug 1 06:55:31 2016 -0700
Committer: Sean Owen 
Committed: Mon Aug 1 06:55:31 2016 -0700

--
 .../test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1e9b59b7/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 4454cad..7424e17 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -28,7 +28,7 @@ class SQLConfSuite extends QueryTest with SharedSQLContext {
   test("propagate from spark conf") {
 // We create a new context here to avoid order dependence with other tests 
that might call
 // clear().
-val newContext = new SQLContext(sparkContext)
+val newContext = new 
SQLContext(SparkSession.builder().sparkContext(sparkContext).getOrCreate())
 assert(newContext.getConf("spark.sql.testkey", "false") === "true")
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-16776][STREAMING] Replace deprecated API in KafkaTestUtils for 0.10.0.

2016-08-01 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 4bdc55898 -> b49091e10


[SPARK-16776][STREAMING] Replace deprecated API in KafkaTestUtils for 0.10.0.

## What changes were proposed in this pull request?

This PR replaces the old Kafka API to 0.10.0 ones in `KafkaTestUtils`.

The change include:

 - `Producer` to `KafkaProducer`
 - Change configurations to equalvant ones. (I referred 
[here](http://kafka.apache.org/documentation.html#producerconfigs) for 0.10.0 
and [here](http://kafka.apache.org/082/documentation.html#producerconfigs
) for old, 0.8.2).

This PR will remove the build warning as below:

```scala
[WARNING] 
.../spark/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala:71:
 class Producer in package producer is deprecated: This class has been 
deprecated and will be removed in a future release. Please use 
org.apache.kafka.clients.producer.KafkaProducer instead.
[WARNING]   private var producer: Producer[String, String] = _
[WARNING] ^
[WARNING] 
.../spark/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala:181:
 class Producer in package producer is deprecated: This class has been 
deprecated and will be removed in a future release. Please use 
org.apache.kafka.clients.producer.KafkaProducer instead.
[WARNING] producer = new Producer[String, String](new 
ProducerConfig(producerConfiguration))
[WARNING]^
[WARNING] .../spark/streaming/kafka010/KafkaTestUtils.scala:181: class 
ProducerConfig in package producer is deprecated: This class has been 
deprecated and will be removed in a future release. Please use 
org.apache.kafka.clients.producer.ProducerConfig instead.
[WARNING] producer = new Producer[String, String](new 
ProducerConfig(producerConfiguration))
[WARNING] ^
[WARNING] 
.../spark/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala:182:
 class KeyedMessage in package producer is deprecated: This class has been 
deprecated and will be removed in a future release. Please use 
org.apache.kafka.clients.producer.ProducerRecord instead.
[WARNING] producer.send(messages.map { new KeyedMessage[String, 
String](topic, _ ) }: _*)
[WARNING]  ^
[WARNING] four warnings found
[WARNING] warning: [options] bootstrap class path not set in conjunction with 
-source 1.7
[WARNING] 1 warning
```

## How was this patch tested?

Existing tests that use `KafkaTestUtils` should cover this.

Author: hyukjinkwon 

Closes #14416 from HyukjinKwon/SPARK-16776.

(cherry picked from commit f93ad4fe7c9728c8dd67a8095de3d39fad21d03f)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b49091e1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b49091e1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b49091e1

Branch: refs/heads/branch-2.0
Commit: b49091e10100dfbefeabdd2dfe0b64cdf613a052
Parents: 4bdc558
Author: hyukjinkwon 
Authored: Mon Aug 1 06:56:52 2016 -0700
Committer: Sean Owen 
Committed: Mon Aug 1 06:57:09 2016 -0700

--
 .../streaming/kafka010/KafkaTestUtils.scala | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b49091e1/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
--
diff --git 
a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
 
b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
index 19192e4b..ecabe1c 100644
--- 
a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
+++ 
b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
@@ -30,10 +30,10 @@ import scala.util.control.NonFatal
 
 import kafka.admin.AdminUtils
 import kafka.api.Request
-import kafka.producer.{KeyedMessage, Producer, ProducerConfig}
-import kafka.serializer.StringEncoder
 import kafka.server.{KafkaConfig, KafkaServer}
 import kafka.utils.ZkUtils
+import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
+import org.apache.kafka.common.serialization.StringSerializer
 import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
 
 import org.apache.spark.SparkConf
@@ -68,7 +68,7 @@ private[kafka010] class KafkaTestUtils extends Logging {
   private var server: KafkaServer = _
 
   // Kafka producer
-  private var producer: Producer[String, String] = _
+  private var producer: KafkaProducer[String, String] = _
 
   // Flag to test whether the system

spark git commit: [SPARK-16776][STREAMING] Replace deprecated API in KafkaTestUtils for 0.10.0.

2016-08-01 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 1e9b59b73 -> f93ad4fe7


[SPARK-16776][STREAMING] Replace deprecated API in KafkaTestUtils for 0.10.0.

## What changes were proposed in this pull request?

This PR replaces the old Kafka API to 0.10.0 ones in `KafkaTestUtils`.

The change include:

 - `Producer` to `KafkaProducer`
 - Change configurations to equalvant ones. (I referred 
[here](http://kafka.apache.org/documentation.html#producerconfigs) for 0.10.0 
and [here](http://kafka.apache.org/082/documentation.html#producerconfigs
) for old, 0.8.2).

This PR will remove the build warning as below:

```scala
[WARNING] 
.../spark/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala:71:
 class Producer in package producer is deprecated: This class has been 
deprecated and will be removed in a future release. Please use 
org.apache.kafka.clients.producer.KafkaProducer instead.
[WARNING]   private var producer: Producer[String, String] = _
[WARNING] ^
[WARNING] 
.../spark/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala:181:
 class Producer in package producer is deprecated: This class has been 
deprecated and will be removed in a future release. Please use 
org.apache.kafka.clients.producer.KafkaProducer instead.
[WARNING] producer = new Producer[String, String](new 
ProducerConfig(producerConfiguration))
[WARNING]^
[WARNING] .../spark/streaming/kafka010/KafkaTestUtils.scala:181: class 
ProducerConfig in package producer is deprecated: This class has been 
deprecated and will be removed in a future release. Please use 
org.apache.kafka.clients.producer.ProducerConfig instead.
[WARNING] producer = new Producer[String, String](new 
ProducerConfig(producerConfiguration))
[WARNING] ^
[WARNING] 
.../spark/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala:182:
 class KeyedMessage in package producer is deprecated: This class has been 
deprecated and will be removed in a future release. Please use 
org.apache.kafka.clients.producer.ProducerRecord instead.
[WARNING] producer.send(messages.map { new KeyedMessage[String, 
String](topic, _ ) }: _*)
[WARNING]  ^
[WARNING] four warnings found
[WARNING] warning: [options] bootstrap class path not set in conjunction with 
-source 1.7
[WARNING] 1 warning
```

## How was this patch tested?

Existing tests that use `KafkaTestUtils` should cover this.

Author: hyukjinkwon 

Closes #14416 from HyukjinKwon/SPARK-16776.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f93ad4fe
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f93ad4fe
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f93ad4fe

Branch: refs/heads/master
Commit: f93ad4fe7c9728c8dd67a8095de3d39fad21d03f
Parents: 1e9b59b
Author: hyukjinkwon 
Authored: Mon Aug 1 06:56:52 2016 -0700
Committer: Sean Owen 
Committed: Mon Aug 1 06:56:52 2016 -0700

--
 .../streaming/kafka010/KafkaTestUtils.scala | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f93ad4fe/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
--
diff --git 
a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
 
b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
index 19192e4b..ecabe1c 100644
--- 
a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
+++ 
b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
@@ -30,10 +30,10 @@ import scala.util.control.NonFatal
 
 import kafka.admin.AdminUtils
 import kafka.api.Request
-import kafka.producer.{KeyedMessage, Producer, ProducerConfig}
-import kafka.serializer.StringEncoder
 import kafka.server.{KafkaConfig, KafkaServer}
 import kafka.utils.ZkUtils
+import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
+import org.apache.kafka.common.serialization.StringSerializer
 import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
 
 import org.apache.spark.SparkConf
@@ -68,7 +68,7 @@ private[kafka010] class KafkaTestUtils extends Logging {
   private var server: KafkaServer = _
 
   // Kafka producer
-  private var producer: Producer[String, String] = _
+  private var producer: KafkaProducer[String, String] = _
 
   // Flag to test whether the system is correctly started
   private var zkReady = false
@@ -178,8 +178,10 @@ private[kafka010] class KafkaT

spark git commit: [SPARK-16791][SQL] cast struct with timestamp field fails

2016-08-01 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master f93ad4fe7 -> 338a98d65


[SPARK-16791][SQL] cast struct with timestamp field fails

## What changes were proposed in this pull request?
a failing test case + fix to SPARK-16791 
(https://issues.apache.org/jira/browse/SPARK-16791)

## How was this patch tested?
added a failing test case to CastSuit, then fixed the Cast code and rerun the 
entire CastSuit

Author: eyal farago 
Author: Eyal Farago 

Closes #14400 from eyalfa/SPARK-16791_cast_struct_with_timestamp_field_fails.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/338a98d6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/338a98d6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/338a98d6

Branch: refs/heads/master
Commit: 338a98d65c8efe0c41f39a8dddeab7040dcda125
Parents: f93ad4f
Author: eyal farago 
Authored: Mon Aug 1 22:43:32 2016 +0800
Committer: Wenchen Fan 
Committed: Mon Aug 1 22:43:32 2016 +0800

--
 .../org/apache/spark/sql/catalyst/expressions/Cast.scala  |  2 +-
 .../apache/spark/sql/catalyst/expressions/CastSuite.scala | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/338a98d6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index c452765..70fff51 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -416,7 +416,7 @@ case class Cast(child: Expression, dataType: DataType) 
extends UnaryExpression w
   }
 
   private[this] def cast(from: DataType, to: DataType): Any => Any = to match {
-case dt if dt == child.dataType => identity[Any]
+case dt if dt == from => identity[Any]
 case StringType => castToString(from)
 case BinaryType => castToBinary(from)
 case DateType => castToDate(from)

http://git-wip-us.apache.org/repos/asf/spark/blob/338a98d6/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 5ae0527..5c35baa 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -727,6 +727,16 @@ class CastSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 }
   }
 
+  test("cast struct with a timestamp field") {
+val originalSchema = new StructType().add("tsField", TimestampType, 
nullable = false)
+// nine out of ten times I'm casting a struct, it's to normalize its 
fields nullability
+val targetSchema = new StructType().add("tsField", TimestampType, nullable 
= true)
+
+val inp = Literal.create(InternalRow(0L), originalSchema)
+val expected = InternalRow(0L)
+checkEvaluation(cast(inp, targetSchema), expected)
+  }
+
   test("complex casting") {
 val complex = Literal.create(
   Row(


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-16791][SQL] cast struct with timestamp field fails

2016-08-01 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 b49091e10 -> 1523bf69a


[SPARK-16791][SQL] cast struct with timestamp field fails

## What changes were proposed in this pull request?
a failing test case + fix to SPARK-16791 
(https://issues.apache.org/jira/browse/SPARK-16791)

## How was this patch tested?
added a failing test case to CastSuit, then fixed the Cast code and rerun the 
entire CastSuit

Author: eyal farago 
Author: Eyal Farago 

Closes #14400 from eyalfa/SPARK-16791_cast_struct_with_timestamp_field_fails.

(cherry picked from commit 338a98d65c8efe0c41f39a8dddeab7040dcda125)
Signed-off-by: Wenchen Fan 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1523bf69
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1523bf69
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1523bf69

Branch: refs/heads/branch-2.0
Commit: 1523bf69a0ef87f36b0b3995ce2d7a33aaff6046
Parents: b49091e
Author: eyal farago 
Authored: Mon Aug 1 22:43:32 2016 +0800
Committer: Wenchen Fan 
Committed: Mon Aug 1 22:44:23 2016 +0800

--
 .../org/apache/spark/sql/catalyst/expressions/Cast.scala  |  2 +-
 .../apache/spark/sql/catalyst/expressions/CastSuite.scala | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1523bf69/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index c452765..70fff51 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -416,7 +416,7 @@ case class Cast(child: Expression, dataType: DataType) 
extends UnaryExpression w
   }
 
   private[this] def cast(from: DataType, to: DataType): Any => Any = to match {
-case dt if dt == child.dataType => identity[Any]
+case dt if dt == from => identity[Any]
 case StringType => castToString(from)
 case BinaryType => castToBinary(from)
 case DateType => castToDate(from)

http://git-wip-us.apache.org/repos/asf/spark/blob/1523bf69/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 5ae0527..5c35baa 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -727,6 +727,16 @@ class CastSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 }
   }
 
+  test("cast struct with a timestamp field") {
+val originalSchema = new StructType().add("tsField", TimestampType, 
nullable = false)
+// nine out of ten times I'm casting a struct, it's to normalize its 
fields nullability
+val targetSchema = new StructType().add("tsField", TimestampType, nullable 
= true)
+
+val inp = Literal.create(InternalRow(0L), originalSchema)
+val expected = InternalRow(0L)
+checkEvaluation(cast(inp, targetSchema), expected)
+  }
+
   test("complex casting") {
 val complex = Literal.create(
   Row(


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark-website git commit: Fix main links to streaming examples

2016-08-01 Thread srowen
Repository: spark-website
Updated Branches:
  refs/heads/asf-site 5e869909e -> 612383440


Fix main links to streaming examples


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/61238344
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/61238344
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/61238344

Branch: refs/heads/asf-site
Commit: 612383440bfa727a9980cf83460b8f32df8d666c
Parents: 5e86990
Author: Isaac Levy 
Authored: Sat Jul 30 10:29:20 2016 -0400
Committer: Isaac Levy 
Committed: Sat Jul 30 10:29:20 2016 -0400

--
 site/streaming/index.html | 2 +-
 streaming/index.md| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/61238344/site/streaming/index.html
--
diff --git a/site/streaming/index.html b/site/streaming/index.html
index ddd5c33..85f9c8b 100644
--- a/site/streaming/index.html
+++ b/site/streaming/index.html
@@ -307,7 +307,7 @@
 
   Download Spark. It includes Streaming 
as a module.
   Read the Spark Streaming 
programming guide, which includes a tutorial and describes system 
architecture, configuration and high availability.
-  Check out example programs in https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/org/apache/spark/streaming/examples";>Scala
 and https://github.com/apache/incubator-spark/tree/master/examples/src/main/java/org/apache/spark/streaming/examples";>Java.
+  Check out example programs in https://github.com/apache/spark/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming";>Scala
 and https://github.com/apache/spark/tree/master/examples/src/main/java/org/apache/spark/examples/streaming";>Java.
 
   
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/61238344/streaming/index.md
--
diff --git a/streaming/index.md b/streaming/index.md
index 691ba36..459d84f 100644
--- a/streaming/index.md
+++ b/streaming/index.md
@@ -130,7 +130,7 @@ subproject: Streaming
 
   Download Spark. It includes 
Streaming as a module.
   Read the Spark Streaming 
programming guide, which includes a tutorial and describes system 
architecture, configuration and high availability.
-  Check out example programs in https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/org/apache/spark/streaming/examples";>Scala
 and https://github.com/apache/incubator-spark/tree/master/examples/src/main/java/org/apache/spark/streaming/examples";>Java.
+  Check out example programs in https://github.com/apache/spark/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming";>Scala
 and https://github.com/apache/spark/tree/master/examples/src/main/java/org/apache/spark/examples/streaming";>Java.
 
   
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-16774][SQL] Fix use of deprecated timestamp constructor & improve timezone handling

2016-08-01 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 338a98d65 -> ab1e761f9


[SPARK-16774][SQL] Fix use of deprecated timestamp constructor & improve 
timezone handling

## What changes were proposed in this pull request?

Removes the deprecated timestamp constructor and incidentally fixes the use 
which was using system timezone rather than the one specified when working near 
DST.

This change also causes the roundtrip tests to fail since it now actually uses 
all the timezones near DST boundaries where it didn't before.

Note: this is only a partial the solution, longer term we should follow up with 
https://issues.apache.org/jira/browse/SPARK-16788 to avoid this problem & 
simplify our timezone handling code.

## How was this patch tested?

New tests for two timezones added so even if user timezone happens to coincided 
with one, the other tests should still fail. Important note: this (temporarily) 
disables the round trip tests until we can fix the issue more thoroughly.

Author: Holden Karau 

Closes #14398 from 
holdenk/SPARK-16774-fix-use-of-deprecated-timestamp-constructor.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ab1e761f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ab1e761f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ab1e761f

Branch: refs/heads/master
Commit: ab1e761f9691b41385e2ed2202c5a671c63c963d
Parents: 338a98d
Author: Holden Karau 
Authored: Mon Aug 1 13:57:05 2016 -0700
Committer: Sean Owen 
Committed: Mon Aug 1 13:57:05 2016 -0700

--
 .../spark/sql/catalyst/util/DateTimeUtils.scala   | 14 --
 .../spark/sql/catalyst/util/DateTimeUtilsSuite.scala  |  3 ++-
 2 files changed, 10 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ab1e761f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index df480a1..0b643a5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -852,8 +852,10 @@ object DateTimeUtils {
 
   /**
* Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in 
given timezone.
+   * TODO: Improve handling of normalization differences.
+   * TODO: Replace with JSR-310 or similar system - see SPARK-16788
*/
-  private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long 
= {
+  private[sql] def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): 
Long = {
 var guess = tz.getRawOffset
 // the actual offset should be calculated based on milliseconds in UTC
 val offset = tz.getOffset(millisLocal - guess)
@@ -875,11 +877,11 @@ object DateTimeUtils {
 val hh = seconds / 3600
 val mm = seconds / 60 % 60
 val ss = seconds % 60
-val nano = millisOfDay % 1000 * 100
-
-// create a Timestamp to get the unix timestamp (in UTC)
-val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, 
nano)
-guess = (millisLocal - timestamp.getTime).toInt
+val ms = millisOfDay % 1000
+val calendar = Calendar.getInstance(tz)
+calendar.set(year, month - 1, day, hh, mm, ss)
+calendar.set(Calendar.MILLISECOND, ms)
+guess = (millisLocal - calendar.getTimeInMillis()).toInt
   }
 }
 guess

http://git-wip-us.apache.org/repos/asf/spark/blob/ab1e761f/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 059a5b7..4f516d0 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -551,7 +551,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 val skipped = skipped_days.getOrElse(tz.getID, Int.MinValue)
 (-2 to 2).foreach { d =>
   if (d != skipped) {
-assert(millisToDays(daysToMillis(d)) === d)
+assert(millisToDays(daysToMillis(d)) === d,
+  s"Round trip of ${d} did not work in tz ${tz}")
   }
 }
   }


-
To unsubscribe, e-mail: commits-unsubscr..

spark git commit: [SPARK-16774][SQL] Fix use of deprecated timestamp constructor & improve timezone handling

2016-08-01 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 1523bf69a -> 4e73cb8eb


[SPARK-16774][SQL] Fix use of deprecated timestamp constructor & improve 
timezone handling

## What changes were proposed in this pull request?

Removes the deprecated timestamp constructor and incidentally fixes the use 
which was using system timezone rather than the one specified when working near 
DST.

This change also causes the roundtrip tests to fail since it now actually uses 
all the timezones near DST boundaries where it didn't before.

Note: this is only a partial the solution, longer term we should follow up with 
https://issues.apache.org/jira/browse/SPARK-16788 to avoid this problem & 
simplify our timezone handling code.

## How was this patch tested?

New tests for two timezones added so even if user timezone happens to coincided 
with one, the other tests should still fail. Important note: this (temporarily) 
disables the round trip tests until we can fix the issue more thoroughly.

Author: Holden Karau 

Closes #14398 from 
holdenk/SPARK-16774-fix-use-of-deprecated-timestamp-constructor.

(cherry picked from commit ab1e761f9691b41385e2ed2202c5a671c63c963d)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e73cb8e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e73cb8e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e73cb8e

Branch: refs/heads/branch-2.0
Commit: 4e73cb8ebdb0dcb1be4dce562bac9214e9905b8e
Parents: 1523bf6
Author: Holden Karau 
Authored: Mon Aug 1 13:57:05 2016 -0700
Committer: Sean Owen 
Committed: Mon Aug 1 13:57:19 2016 -0700

--
 .../spark/sql/catalyst/util/DateTimeUtils.scala   | 14 --
 .../spark/sql/catalyst/util/DateTimeUtilsSuite.scala  |  3 ++-
 2 files changed, 10 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4e73cb8e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index df480a1..0b643a5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -852,8 +852,10 @@ object DateTimeUtils {
 
   /**
* Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in 
given timezone.
+   * TODO: Improve handling of normalization differences.
+   * TODO: Replace with JSR-310 or similar system - see SPARK-16788
*/
-  private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long 
= {
+  private[sql] def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): 
Long = {
 var guess = tz.getRawOffset
 // the actual offset should be calculated based on milliseconds in UTC
 val offset = tz.getOffset(millisLocal - guess)
@@ -875,11 +877,11 @@ object DateTimeUtils {
 val hh = seconds / 3600
 val mm = seconds / 60 % 60
 val ss = seconds % 60
-val nano = millisOfDay % 1000 * 100
-
-// create a Timestamp to get the unix timestamp (in UTC)
-val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, 
nano)
-guess = (millisLocal - timestamp.getTime).toInt
+val ms = millisOfDay % 1000
+val calendar = Calendar.getInstance(tz)
+calendar.set(year, month - 1, day, hh, mm, ss)
+calendar.set(Calendar.MILLISECOND, ms)
+guess = (millisLocal - calendar.getTimeInMillis()).toInt
   }
 }
 guess

http://git-wip-us.apache.org/repos/asf/spark/blob/4e73cb8e/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 059a5b7..4f516d0 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -551,7 +551,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 val skipped = skipped_days.getOrElse(tz.getID, Int.MinValue)
 (-2 to 2).foreach { d =>
   if (d != skipped) {
-assert(millisToDays(daysToMillis(d)) === d)
+assert(millisToDays(daysToMillis(d)) === d,
+  s"Round trip of ${d} did not work in tz ${tz}")
   }
 }
   }



spark git commit: [SPARK-15869][STREAMING] Fix a potential NPE in StreamingJobProgressListener.getBatchUIData

2016-08-01 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/master ab1e761f9 -> 03d46aafe


[SPARK-15869][STREAMING] Fix a potential NPE in 
StreamingJobProgressListener.getBatchUIData

## What changes were proposed in this pull request?

Moved `asScala` to a `map` to avoid NPE.

## How was this patch tested?

Existing unit tests.

Author: Shixiong Zhu 

Closes #14443 from zsxwing/SPARK-15869.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/03d46aaf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/03d46aaf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/03d46aaf

Branch: refs/heads/master
Commit: 03d46aafe561b03e25f4e25cf01e631c18dd827c
Parents: ab1e761
Author: Shixiong Zhu 
Authored: Mon Aug 1 14:41:22 2016 -0700
Committer: Shixiong Zhu 
Committed: Mon Aug 1 14:41:22 2016 -0700

--
 .../apache/spark/streaming/ui/StreamingJobProgressListener.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/03d46aaf/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
 
b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
index c086df4..61f852a 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
@@ -259,7 +259,7 @@ private[streaming] class StreamingJobProgressListener(ssc: 
StreamingContext)
   // We use an Iterable rather than explicitly converting to a seq so that 
updates
   // will propagate
   val outputOpIdToSparkJobIds: Iterable[OutputOpIdAndSparkJobId] =
-Option(batchTimeToOutputOpIdSparkJobIdPair.get(batchTime).asScala)
+
Option(batchTimeToOutputOpIdSparkJobIdPair.get(batchTime)).map(_.asScala)
   .getOrElse(Seq.empty)
   _batchUIData.outputOpIdSparkJobIdPairs = outputOpIdToSparkJobIds
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15869][STREAMING] Fix a potential NPE in StreamingJobProgressListener.getBatchUIData

2016-08-01 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 4e73cb8eb -> 1813bbd9b


[SPARK-15869][STREAMING] Fix a potential NPE in 
StreamingJobProgressListener.getBatchUIData

## What changes were proposed in this pull request?

Moved `asScala` to a `map` to avoid NPE.

## How was this patch tested?

Existing unit tests.

Author: Shixiong Zhu 

Closes #14443 from zsxwing/SPARK-15869.

(cherry picked from commit 03d46aafe561b03e25f4e25cf01e631c18dd827c)
Signed-off-by: Shixiong Zhu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1813bbd9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1813bbd9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1813bbd9

Branch: refs/heads/branch-2.0
Commit: 1813bbd9bf7cb9afd29e1385f0dc52e8fcc4f132
Parents: 4e73cb8
Author: Shixiong Zhu 
Authored: Mon Aug 1 14:41:22 2016 -0700
Committer: Shixiong Zhu 
Committed: Mon Aug 1 14:41:34 2016 -0700

--
 .../apache/spark/streaming/ui/StreamingJobProgressListener.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1813bbd9/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
 
b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
index c086df4..61f852a 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
@@ -259,7 +259,7 @@ private[streaming] class StreamingJobProgressListener(ssc: 
StreamingContext)
   // We use an Iterable rather than explicitly converting to a seq so that 
updates
   // will propagate
   val outputOpIdToSparkJobIds: Iterable[OutputOpIdAndSparkJobId] =
-Option(batchTimeToOutputOpIdSparkJobIdPair.get(batchTime).asScala)
+
Option(batchTimeToOutputOpIdSparkJobIdPair.get(batchTime)).map(_.asScala)
   .getOrElse(Seq.empty)
   _batchUIData.outputOpIdSparkJobIdPairs = outputOpIdToSparkJobIds
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-16828][SQL] remove MaxOf and MinOf

2016-08-01 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/master 03d46aafe -> 2eedc00b0


[SPARK-16828][SQL] remove MaxOf and MinOf

## What changes were proposed in this pull request?

These 2 expressions are not needed anymore after we have `Greatest` and 
`Least`. This PR removes them and related tests.

## How was this patch tested?

N/A

Author: Wenchen Fan 

Closes #14434 from cloud-fan/minor1.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2eedc00b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2eedc00b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2eedc00b

Branch: refs/heads/master
Commit: 2eedc00b04ef8ca771ff64c4f834c25f835f5f44
Parents: 03d46aa
Author: Wenchen Fan 
Authored: Mon Aug 1 17:54:41 2016 -0700
Committer: Yin Huai 
Committed: Mon Aug 1 17:54:41 2016 -0700

--
 .../sql/catalyst/expressions/arithmetic.scala   | 110 ---
 .../sql/catalyst/optimizer/Optimizer.scala  |   4 -
 .../analysis/ExpressionTypeCheckingSuite.scala  |   7 --
 .../expressions/ArithmeticExpressionSuite.scala |  54 -
 4 files changed, 175 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2eedc00b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 7ff8795..77d40a5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -361,116 +361,6 @@ case class Remainder(left: Expression, right: Expression)
   }
 }
 
-case class MaxOf(left: Expression, right: Expression)
-  extends BinaryArithmetic with NonSQLExpression {
-
-  // TODO: Remove MaxOf and MinOf, and replace its usage with Greatest and 
Least.
-
-  override def inputType: AbstractDataType = TypeCollection.Ordered
-
-  override def nullable: Boolean = left.nullable && right.nullable
-
-  private lazy val ordering = TypeUtils.getInterpretedOrdering(dataType)
-
-  override def eval(input: InternalRow): Any = {
-val input1 = left.eval(input)
-val input2 = right.eval(input)
-if (input1 == null) {
-  input2
-} else if (input2 == null) {
-  input1
-} else {
-  if (ordering.compare(input1, input2) < 0) {
-input2
-  } else {
-input1
-  }
-}
-  }
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-val eval1 = left.genCode(ctx)
-val eval2 = right.genCode(ctx)
-val compCode = ctx.genComp(dataType, eval1.value, eval2.value)
-
-ev.copy(code = eval1.code + eval2.code + s"""
-  boolean ${ev.isNull} = false;
-  ${ctx.javaType(left.dataType)} ${ev.value} =
-${ctx.defaultValue(left.dataType)};
-
-  if (${eval1.isNull}) {
-${ev.isNull} = ${eval2.isNull};
-${ev.value} = ${eval2.value};
-  } else if (${eval2.isNull}) {
-${ev.isNull} = ${eval1.isNull};
-${ev.value} = ${eval1.value};
-  } else {
-if ($compCode > 0) {
-  ${ev.value} = ${eval1.value};
-} else {
-  ${ev.value} = ${eval2.value};
-}
-  }""")
-  }
-
-  override def symbol: String = "max"
-}
-
-case class MinOf(left: Expression, right: Expression)
-  extends BinaryArithmetic with NonSQLExpression {
-
-  // TODO: Remove MaxOf and MinOf, and replace its usage with Greatest and 
Least.
-
-  override def inputType: AbstractDataType = TypeCollection.Ordered
-
-  override def nullable: Boolean = left.nullable && right.nullable
-
-  private lazy val ordering = TypeUtils.getInterpretedOrdering(dataType)
-
-  override def eval(input: InternalRow): Any = {
-val input1 = left.eval(input)
-val input2 = right.eval(input)
-if (input1 == null) {
-  input2
-} else if (input2 == null) {
-  input1
-} else {
-  if (ordering.compare(input1, input2) < 0) {
-input1
-  } else {
-input2
-  }
-}
-  }
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-val eval1 = left.genCode(ctx)
-val eval2 = right.genCode(ctx)
-val compCode = ctx.genComp(dataType, eval1.value, eval2.value)
-
-ev.copy(code = eval1.code + eval2.code + s"""
-  boolean ${ev.isNull} = false;
-  ${ctx.javaType(left.dataType)} ${ev.value} =
-${ctx.defaultValue(left.dataType)};
-
-  if (${eval1.isNull}) {
-${ev.isNull} = ${eval2.isNull};
-${ev.value} = ${eval2.value};
-  } else if (${eval2.isNull}) {
-${ev.isNull} = ${eval1.isNull};
-${ev.valu

spark git commit: [SPARK-16818] Exchange reuse incorrectly reuses scans over different sets of partitions

2016-08-01 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 1813bbd9b -> 5fbf5f93e


[SPARK-16818] Exchange reuse incorrectly reuses scans over different sets of 
partitions

https://github.com/apache/spark/pull/14425 rebased for branch-2.0

Author: Eric Liang 

Closes #14427 from ericl/spark-16818-br-2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fbf5f93
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fbf5f93
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fbf5f93

Branch: refs/heads/branch-2.0
Commit: 5fbf5f93ee5aa4d1aca0fa0c8fb769a085dd7b93
Parents: 1813bbd
Author: Eric Liang 
Authored: Mon Aug 1 19:46:20 2016 -0700
Committer: Reynold Xin 
Committed: Mon Aug 1 19:46:20 2016 -0700

--
 .../datasources/FileSourceStrategy.scala|  2 ++
 .../datasources/FileSourceStrategySuite.scala   | 35 +++-
 2 files changed, 36 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5fbf5f93/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 13a86bf..8af9562 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -202,7 +202,9 @@ private[sql] object FileSourceStrategy extends Strategy 
with Logging {
   partitions
   }
 
+  // These metadata values make scan plans uniquely identifiable for 
equality checking.
   val meta = Map(
+"PartitionFilters" -> partitionKeyFilters.mkString("[", ", ", "]"),
 "Format" -> files.fileFormat.toString,
 "ReadSchema" -> prunedDataSchema.simpleString,
 PUSHED_FILTERS -> pushedDownFilters.mkString("[", ", ", "]"),

http://git-wip-us.apache.org/repos/asf/spark/blob/5fbf5f93/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index 8d8a18f..7a24f21 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionSet, 
PredicateHelper}
 import org.apache.spark.sql.catalyst.util
-import org.apache.spark.sql.execution.DataSourceScanExec
+import org.apache.spark.sql.execution.{DataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
@@ -407,6 +407,39 @@ class FileSourceStrategySuite extends QueryTest with 
SharedSQLContext with Predi
 }
   }
 
+  test("[SPARK-16818] partition pruned file scans implement sameResult 
correctly") {
+withTempPath { path =>
+  val tempDir = path.getCanonicalPath
+  spark.range(100)
+.selectExpr("id", "id as b")
+.write
+.partitionBy("id")
+.parquet(tempDir)
+  val df = spark.read.parquet(tempDir)
+  def getPlan(df: DataFrame): SparkPlan = {
+df.queryExecution.executedPlan
+  }
+  assert(getPlan(df.where("id = 2")).sameResult(getPlan(df.where("id = 
2"
+  assert(!getPlan(df.where("id = 2")).sameResult(getPlan(df.where("id = 
3"
+}
+  }
+
+  test("[SPARK-16818] exchange reuse respects differences in partition 
pruning") {
+spark.conf.set("spark.sql.exchange.reuse", true)
+withTempPath { path =>
+  val tempDir = path.getCanonicalPath
+  spark.range(10)
+.selectExpr("id % 2 as a", "id % 3 as b", "id as c")
+.write
+.partitionBy("a")
+.parquet(tempDir)
+  val df = spark.read.parquet(tempDir)
+  val df1 = df.where("a = 0").groupBy("b").agg("c" -> "sum")
+  val df2 = df.where("a = 1").groupBy("b").agg("c" -> "sum")
+  checkAnswer(df1.join(df2, "b"), Row(0, 6, 12) :: Row(1, 4, 8) :: Row(2, 
10, 5) :: Nil)
+}
+  }
+
   // Helpers for checking the arguments passed to the FileFormat.
 
   protected val checkPartitionSchema =


-
To u

spark git commit: [SPARK-16793][SQL] Set the temporary warehouse path to sc'conf in TestHive.

2016-08-01 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 2eedc00b0 -> 5184df06b


[SPARK-16793][SQL] Set the temporary warehouse path to sc'conf in TestHive.

## What changes were proposed in this pull request?

With SPARK-15034, we could use the value of spark.sql.warehouse.dir to set the 
warehouse location. In TestHive, we can now simply set the temporary warehouse 
path in sc's conf, and thus, param "warehousePath" could be removed.

## How was this patch tested?

exsiting testsuites.

Author: jiangxingbo 

Closes #14401 from jiangxb1987/warehousePath.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5184df06
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5184df06
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5184df06

Branch: refs/heads/master
Commit: 5184df06b347f86776c8ac87415b8002a5942a35
Parents: 2eedc00
Author: jiangxingbo 
Authored: Mon Aug 1 23:08:06 2016 -0700
Committer: Reynold Xin 
Committed: Mon Aug 1 23:08:06 2016 -0700

--
 .../apache/spark/sql/hive/test/TestHive.scala   | 42 +---
 .../sql/hive/execution/HiveQuerySuite.scala |  2 +-
 .../spark/sql/sources/BucketedReadSuite.scala   |  2 +-
 3 files changed, 21 insertions(+), 25 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5184df06/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 7f89204..fbacd59 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -54,6 +54,7 @@ object TestHive
 .set("spark.sql.test", "")
 .set("spark.sql.hive.metastore.barrierPrefixes",
   "org.apache.spark.sql.hive.execution.PairSerDe")
+.set("spark.sql.warehouse.dir", 
TestHiveContext.makeWarehouseDir().toURI.getPath)
 // SPARK-8910
 .set("spark.ui.enabled", "false")))
 
@@ -111,7 +112,6 @@ class TestHiveContext(
  * A [[SparkSession]] used in [[TestHiveContext]].
  *
  * @param sc SparkContext
- * @param warehousePath path to the Hive warehouse directory
  * @param scratchDirPath scratch directory used by Hive's metastore client
  * @param metastoreTemporaryConf configuration options for Hive's metastore
  * @param existingSharedState optional [[TestHiveSharedState]]
@@ -120,23 +120,15 @@ class TestHiveContext(
  */
 private[hive] class TestHiveSparkSession(
 @transient private val sc: SparkContext,
-val warehousePath: File,
 scratchDirPath: File,
 metastoreTemporaryConf: Map[String, String],
 @transient private val existingSharedState: Option[TestHiveSharedState],
 private val loadTestTables: Boolean)
   extends SparkSession(sc) with Logging { self =>
 
-  // TODO: We need to set the temp warehouse path to sc's conf.
-  // Right now, In SparkSession, we will set the warehouse path to the default 
one
-  // instead of the temp one. Then, we override the setting in 
TestHiveSharedState
-  // when we creating metadataHive. This flow is not easy to follow and can 
introduce
-  // confusion when a developer is debugging an issue. We need to refactor 
this part
-  // to just set the temp warehouse path in sc's conf.
   def this(sc: SparkContext, loadTestTables: Boolean) {
 this(
   sc,
-  Utils.createTempDir(namePrefix = "warehouse"),
   TestHiveContext.makeScratchDir(),
   HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false),
   None,
@@ -151,16 +143,16 @@ private[hive] class TestHiveSparkSession(
   @transient
   override lazy val sharedState: TestHiveSharedState = {
 existingSharedState.getOrElse(
-  new TestHiveSharedState(sc, warehousePath, scratchDirPath, 
metastoreTemporaryConf))
+  new TestHiveSharedState(sc, scratchDirPath, metastoreTemporaryConf))
   }
 
   @transient
   override lazy val sessionState: TestHiveSessionState =
-new TestHiveSessionState(self, warehousePath)
+new TestHiveSessionState(self)
 
   override def newSession(): TestHiveSparkSession = {
 new TestHiveSparkSession(
-  sc, warehousePath, scratchDirPath, metastoreTemporaryConf, 
Some(sharedState), loadTestTables)
+  sc, scratchDirPath, metastoreTemporaryConf, Some(sharedState), 
loadTestTables)
   }
 
   private var cacheTables: Boolean = false
@@ -199,6 +191,12 @@ private[hive] class TestHiveSparkSession(
 new 
File(Thread.currentThread().getContextClassLoader.getResource(path).getFile)
   }
 
+  def getWarehousePath(): String = {
+val tempConf = new SQLConf
+sc.conf.getAll.foreach { case (k, v) => tempConf.setConfString(k, v) }