spark git commit: [SPARK-17346][SQL][TEST-MAVEN] Generate the sql test jar to fix the maven build

2016-10-05 Thread tdas
Repository: spark
Updated Branches:
  refs/heads/master 9293734d3 -> b678e465a


[SPARK-17346][SQL][TEST-MAVEN] Generate the sql test jar to fix the maven build

## What changes were proposed in this pull request?

Generate the sql test jar to fix the maven build

## How was this patch tested?

Jenkins

Author: Shixiong Zhu 

Closes #15368 from zsxwing/sql-test-jar.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b678e465
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b678e465
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b678e465

Branch: refs/heads/master
Commit: b678e465afa417780b54db0fbbaa311621311f15
Parents: 9293734
Author: Shixiong Zhu 
Authored: Wed Oct 5 18:11:31 2016 -0700
Committer: Tathagata Das 
Committed: Wed Oct 5 18:11:31 2016 -0700

--
 external/kafka-0-10-sql/pom.xml | 14 ++
 .../spark/sql/kafka010/KafkaSourceSuite.scala   |  1 +
 sql/core/pom.xml| 27 
 3 files changed, 42 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b678e465/external/kafka-0-10-sql/pom.xml
--
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index b96445a..ebff5fd 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -43,6 +43,20 @@
 
 
   org.apache.spark
+  spark-core_${scala.binary.version}
+  ${project.version}
+  test-jar
+  test
+
+
+  org.apache.spark
+  spark-catalyst_${scala.binary.version}
+  ${project.version}
+  test-jar
+  test
+
+
+  org.apache.spark
   spark-sql_${scala.binary.version}
   ${project.version}
   test-jar

http://git-wip-us.apache.org/repos/asf/spark/blob/b678e465/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
--
diff --git 
a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
 
b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index 64bf503..6c03070 100644
--- 
a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ 
b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -151,6 +151,7 @@ class KafkaSourceSuite extends KafkaSourceTest {
 val mapped = kafka.map(kv => kv._2.toInt + 1)
 
 testStream(mapped)(
+  makeSureGetOffsetCalled,
   StopStream
 )
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/b678e465/sql/core/pom.xml
--
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 84de1d4..7da7715 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -132,6 +132,33 @@
 
target/scala-${scala.binary.version}/classes
 
target/scala-${scala.binary.version}/test-classes
 
+   
+  
+org.apache.maven.plugins
+maven-jar-plugin
+
+  
+
+  test-jar
+
+  
+  
+test-jar-on-test-compile
+test-compile
+
+  test-jar
+
+  
+
+  
   
 org.codehaus.mojo
 build-helper-maven-plugin


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-17643] Remove comparable requirement from Offset (backport for branch-2.0)

2016-10-05 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 3b6463a79 -> 1c2dff1ee


[SPARK-17643] Remove comparable requirement from Offset (backport for 
branch-2.0)

## What changes were proposed in this pull request?

Backport 
https://github.com/apache/spark/commit/988c71457354b0a443471f501cef544a85b1a76a 
to branch-2.0

## How was this patch tested?

Jenkins

Author: Michael Armbrust 

Closes #15362 from zsxwing/SPARK-17643-2.0.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c2dff1e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c2dff1e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c2dff1e

Branch: refs/heads/branch-2.0
Commit: 1c2dff1eeeb045f3f5c3c1423ba07371b03965d7
Parents: 3b6463a
Author: Michael Armbrust 
Authored: Wed Oct 5 16:48:43 2016 -0700
Committer: Shixiong Zhu 
Committed: Wed Oct 5 16:48:43 2016 -0700

--
 .../execution/streaming/CompositeOffset.scala   | 30 ---
 .../sql/execution/streaming/LongOffset.scala|  6 ---
 .../spark/sql/execution/streaming/Offset.scala  | 19 ++
 .../execution/streaming/StreamExecution.scala   |  9 +++--
 .../spark/sql/streaming/OffsetSuite.scala   | 39 
 5 files changed, 9 insertions(+), 94 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1c2dff1e/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompositeOffset.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompositeOffset.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompositeOffset.scala
index 729c846..ebc6ee8 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompositeOffset.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompositeOffset.scala
@@ -24,36 +24,6 @@ package org.apache.spark.sql.execution.streaming
  */
 case class CompositeOffset(offsets: Seq[Option[Offset]]) extends Offset {
   /**
-   * Returns a negative integer, zero, or a positive integer as this object is 
less than, equal to,
-   * or greater than the specified object.
-   */
-  override def compareTo(other: Offset): Int = other match {
-case otherComposite: CompositeOffset if otherComposite.offsets.size == 
offsets.size =>
-  val comparisons = offsets.zip(otherComposite.offsets).map {
-case (Some(a), Some(b)) => a compareTo b
-case (None, None) => 0
-case (None, _) => -1
-case (_, None) => 1
-  }
-  val nonZeroSigns = comparisons.map(sign).filter(_ != 0).toSet
-  nonZeroSigns.size match {
-case 0 => 0   // if both empty or only 0s
-case 1 => nonZeroSigns.head   // if there are only (0s and 1s) or 
(0s and -1s)
-case _ => // there are both 1s and -1s
-  throw new IllegalArgumentException(
-s"Invalid comparison between non-linear histories: $this <=> 
$other")
-  }
-case _ =>
-  throw new IllegalArgumentException(s"Cannot compare $this <=> $other")
-  }
-
-  private def sign(num: Int): Int = num match {
-case i if i < 0 => -1
-case i if i == 0 => 0
-case i if i > 0 => 1
-  }
-
-  /**
* Unpacks an offset into [[StreamProgress]] by associating each offset with 
the order list of
* sources.
*

http://git-wip-us.apache.org/repos/asf/spark/blob/1c2dff1e/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
index bb17640..c5e8827 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
@@ -22,12 +22,6 @@ package org.apache.spark.sql.execution.streaming
  */
 case class LongOffset(offset: Long) extends Offset {
 
-  override def compareTo(other: Offset): Int = other match {
-case l: LongOffset => offset.compareTo(l.offset)
-case _ =>
-  throw new IllegalArgumentException(s"Invalid comparison of $getClass 
with ${other.getClass}")
-  }
-
   def +(increment: Long): LongOffset = new LongOffset(offset + increment)
   def -(decrement: Long): LongOffset = new LongOffset(offset - decrement)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/1c2dff1e/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala

[2/2] spark git commit: [SPARK-17346][SQL] Add Kafka source for Structured Streaming

2016-10-05 Thread tdas
[SPARK-17346][SQL] Add Kafka source for Structured Streaming

## What changes were proposed in this pull request?

This PR adds a new project ` external/kafka-0-10-sql` for Structured Streaming 
Kafka source.

It's based on the design doc: 
https://docs.google.com/document/d/19t2rWe51x7tq2e5AOfrsM9qb8_m7BRuv9fel9i0PqR8/edit?usp=sharing

tdas did most of work and part of them was inspired by koeninger's work.

### Introduction

The Kafka source is a structured streaming data source to poll data from Kafka. 
The schema of reading data is as follows:

Column | Type
 | 
key | binary
value | binary
topic | string
partition | int
offset | long
timestamp | long
timestampType | int

The source can deal with deleting topics. However, the user should make sure 
there is no Spark job processing the data when deleting a topic.

### Configuration

The user can use `DataStreamReader.option` to set the following configurations.

Kafka Source's options | value | default | meaning
-- | --- | -- | -
startingOffset | ["earliest", "latest"] | "latest" | The start point when a 
query is started, either "earliest" which is from the earliest offset, or 
"latest" which is just from the latest offset. Note: This only applies when a 
new Streaming query is started, and that resuming will always pick up from 
where the query left off.
failOnDataLost | [true, false] | true | Whether to fail the query when it's 
possible that data is lost (e.g., topics are deleted, or offsets are out of 
range). This may be a false alarm. You can disable it when it doesn't work as 
you expected.
subscribe | A comma-separated list of topics | (none) | The topic list to 
subscribe. Only one of "subscribe" and "subscribeParttern" options can be 
specified for Kafka source.
subscribePattern | Java regex string | (none) | The pattern used to subscribe 
the topic. Only one of "subscribe" and "subscribeParttern" options can be 
specified for Kafka source.
kafka.consumer.poll.timeoutMs | long | 512 | The timeout in milliseconds to 
poll data from Kafka in executors
fetchOffset.numRetries | int | 3 | Number of times to retry before giving up 
fatch Kafka latest offsets.
fetchOffset.retryIntervalMs | long | 10 | milliseconds to wait before retrying 
to fetch Kafka offsets

Kafka's own configurations can be set via `DataStreamReader.option` with 
`kafka.` prefix, e.g, `stream.option("kafka.bootstrap.servers", "host:port")`

### Usage

* Subscribe to 1 topic
```Scala
spark
  .readStream
  .format("kafka")
  .option("kafka.bootstrap.servers", "host:port")
  .option("subscribe", "topic1")
  .load()
```

* Subscribe to multiple topics
```Scala
spark
  .readStream
  .format("kafka")
  .option("kafka.bootstrap.servers", "host:port")
  .option("subscribe", "topic1,topic2")
  .load()
```

* Subscribe to a pattern
```Scala
spark
  .readStream
  .format("kafka")
  .option("kafka.bootstrap.servers", "host:port")
  .option("subscribePattern", "topic.*")
  .load()
```

## How was this patch tested?

The new unit tests.

Author: Shixiong Zhu 
Author: Tathagata Das 
Author: Shixiong Zhu 
Author: cody koeninger 

Closes #15102 from zsxwing/kafka-source.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9293734d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9293734d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9293734d

Branch: refs/heads/master
Commit: 9293734d35eb3d6e4fd4ebb86f54dd5d3a35e6db
Parents: 5fd54b9
Author: Shixiong Zhu 
Authored: Wed Oct 5 16:45:45 2016 -0700
Committer: Tathagata Das 
Committed: Wed Oct 5 16:45:45 2016 -0700

--
 .../spark/util/UninterruptibleThread.scala  |   7 -
 dev/run-tests.py|   2 +-
 dev/sparktestsupport/modules.py |  12 +
 docs/structured-streaming-kafka-integration.md  | 239 +++
 docs/structured-streaming-programming-guide.md  |   7 +-
 external/kafka-0-10-sql/pom.xml |  82 
 apache.spark.sql.sources.DataSourceRegister |   1 +
 .../sql/kafka010/CachedKafkaConsumer.scala  | 152 +++
 .../apache/spark/sql/kafka010/KafkaSource.scala | 399 +
 .../spark/sql/kafka010/KafkaSourceOffset.scala  |  54 +++
 .../sql/kafka010/KafkaSourceProvider.scala  | 282 
 .../spark/sql/kafka010/KafkaSourceRDD.scala | 148 +++
 .../apache/spark/sql/kafka010/package-info.java |  21 +
 .../src/test/resources/log4j.properties |  28 ++
 .../sql/kafka010/KafkaSourceOffsetSuite.scala   |  39 ++
 .../spark/sql/kafka010/KafkaSourceSuite.scala   | 424 +++
 .../spark/sql/kafka010/KafkaTestUtils.scala | 339 +++
 pom.xml |   1 +
 

[1/2] spark git commit: [SPARK-17346][SQL] Add Kafka source for Structured Streaming

2016-10-05 Thread tdas
Repository: spark
Updated Branches:
  refs/heads/master 5fd54b994 -> 9293734d3


http://git-wip-us.apache.org/repos/asf/spark/blob/9293734d/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
--
diff --git 
a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
 
b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
new file mode 100644
index 000..64bf503
--- /dev/null
+++ 
b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -0,0 +1,424 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.util.Random
+
+import org.apache.kafka.clients.producer.RecordMetadata
+import org.scalatest.BeforeAndAfter
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.sql.test.SharedSQLContext
+
+
+abstract class KafkaSourceTest extends StreamTest with SharedSQLContext {
+
+  protected var testUtils: KafkaTestUtils = _
+
+  override val streamingTimeout = 30.seconds
+
+  override def beforeAll(): Unit = {
+super.beforeAll()
+testUtils = new KafkaTestUtils
+testUtils.setup()
+  }
+
+  override def afterAll(): Unit = {
+if (testUtils != null) {
+  testUtils.teardown()
+  testUtils = null
+  super.afterAll()
+}
+  }
+
+  protected def makeSureGetOffsetCalled = AssertOnQuery { q =>
+// Because KafkaSource's initialPartitionOffsets is set lazily, we need to 
make sure
+// its "getOffset" is called before pushing any data. Otherwise, because 
of the race contion,
+// we don't know which data should be fetched when `startingOffset` is 
latest.
+q.processAllAvailable()
+true
+  }
+
+  /**
+   * Add data to Kafka.
+   *
+   * `topicAction` can be used to run actions for each topic before inserting 
data.
+   */
+  case class AddKafkaData(topics: Set[String], data: Int*)
+(implicit ensureDataInMultiplePartition: Boolean = false,
+  concurrent: Boolean = false,
+  message: String = "",
+  topicAction: (String, Option[Int]) => Unit = (_, _) => {}) extends 
AddData {
+
+override def addData(query: Option[StreamExecution]): (Source, Offset) = {
+  if (query.get.isActive) {
+// Make sure no Spark job is running when deleting a topic
+query.get.processAllAvailable()
+  }
+
+  val existingTopics = testUtils.getAllTopicsAndPartitionSize().toMap
+  val newTopics = topics.diff(existingTopics.keySet)
+  for (newTopic <- newTopics) {
+topicAction(newTopic, None)
+  }
+  for (existingTopicPartitions <- existingTopics) {
+topicAction(existingTopicPartitions._1, 
Some(existingTopicPartitions._2))
+  }
+
+  // Read all topics again in case some topics are delete.
+  val allTopics = testUtils.getAllTopicsAndPartitionSize().toMap.keys
+  require(
+query.nonEmpty,
+"Cannot add data when there is no query for finding the active kafka 
source")
+
+  val sources = query.get.logicalPlan.collect {
+case StreamingExecutionRelation(source, _) if 
source.isInstanceOf[KafkaSource] =>
+  source.asInstanceOf[KafkaSource]
+  }
+  if (sources.isEmpty) {
+throw new Exception(
+  "Could not find Kafka source in the StreamExecution logical plan to 
add data to")
+  } else if (sources.size > 1) {
+throw new Exception(
+  "Could not select the Kafka source in the StreamExecution logical 
plan as there" +
+"are multiple Kafka sources:\n\t" + sources.mkString("\n\t"))
+  }
+  val kafkaSource = sources.head
+  val topic = topics.toSeq(Random.nextInt(topics.size))
+  val sentMetadata = testUtils.sendMessages(topic, data.map { _.toString 
}.toArray)
+
+  def metadataToStr(m: (String, RecordMetadata)): String = {
+s"Sent ${m._1} to partition ${m._2.partition()}, offset 
${m._2.offset()}"
+  }
+  // Verify 

spark git commit: [SPARK-17758][SQL] Last returns wrong result in case of empty partition

2016-10-05 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/master 221b418b1 -> 5fd54b994


[SPARK-17758][SQL] Last returns wrong result in case of empty partition

## What changes were proposed in this pull request?
The result of the `Last` function can be wrong when the last partition 
processed is empty. It can return `null` instead of the expected value. For 
example, this can happen when we process partitions in the following order:
```
- Partition 1 [Row1, Row2]
- Partition 2 [Row3]
- Partition 3 []
```
In this case the `Last` function will currently return a null, instead of the 
value of `Row3`.

This PR fixes this by adding a `valueSet` flag to the `Last` function.

## How was this patch tested?
We only used end to end tests for `DeclarativeAggregateFunction`s. I have added 
an evaluator for these functions so we can tests them in catalyst. I have added 
a `LastTestSuite` to test the `Last` aggregate function.

Author: Herman van Hovell 

Closes #15348 from hvanhovell/SPARK-17758.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fd54b99
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fd54b99
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fd54b99

Branch: refs/heads/master
Commit: 5fd54b994e2078dbf0794932b4e0ffa9a9eda0c3
Parents: 221b418
Author: Herman van Hovell 
Authored: Wed Oct 5 16:05:30 2016 -0700
Committer: Yin Huai 
Committed: Wed Oct 5 16:05:30 2016 -0700

--
 .../catalyst/expressions/aggregate/Last.scala   |  27 ++---
 .../DeclarativeAggregateEvaluator.scala |  61 +++
 .../expressions/aggregate/LastTestSuite.scala   | 109 +++
 3 files changed, 184 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5fd54b99/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
index af88403..8579f72 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
@@ -55,34 +55,35 @@ case class Last(child: Expression, ignoreNullsExpr: 
Expression) extends Declarat
 
   private lazy val last = AttributeReference("last", child.dataType)()
 
-  override lazy val aggBufferAttributes: Seq[AttributeReference] = last :: Nil
+  private lazy val valueSet = AttributeReference("valueSet", BooleanType)()
+
+  override lazy val aggBufferAttributes: Seq[AttributeReference] = last :: 
valueSet :: Nil
 
   override lazy val initialValues: Seq[Literal] = Seq(
-/* last = */ Literal.create(null, child.dataType)
+/* last = */ Literal.create(null, child.dataType),
+/* valueSet = */ Literal.create(false, BooleanType)
   )
 
   override lazy val updateExpressions: Seq[Expression] = {
 if (ignoreNulls) {
   Seq(
-/* last = */ If(IsNull(child), last, child)
+/* last = */ If(IsNull(child), last, child),
+/* valueSet = */ Or(valueSet, IsNotNull(child))
   )
 } else {
   Seq(
-/* last = */ child
+/* last = */ child,
+/* valueSet = */ Literal.create(true, BooleanType)
   )
 }
   }
 
   override lazy val mergeExpressions: Seq[Expression] = {
-if (ignoreNulls) {
-  Seq(
-/* last = */ If(IsNull(last.right), last.left, last.right)
-  )
-} else {
-  Seq(
-/* last = */ last.right
-  )
-}
+// Prefer the right hand expression if it has been set.
+Seq(
+  /* last = */ If(valueSet.right, last.right, last.left),
+  /* valueSet = */ Or(valueSet.right, valueSet.left)
+)
   }
 
   override lazy val evaluateExpression: AttributeReference = last

http://git-wip-us.apache.org/repos/asf/spark/blob/5fd54b99/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
new file mode 100644
index 000..614f24d
--- /dev/null
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * 

spark git commit: [SPARK-17758][SQL] Last returns wrong result in case of empty partition

2016-10-05 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 b8df2e53c -> 3b6463a79


[SPARK-17758][SQL] Last returns wrong result in case of empty partition

## What changes were proposed in this pull request?
The result of the `Last` function can be wrong when the last partition 
processed is empty. It can return `null` instead of the expected value. For 
example, this can happen when we process partitions in the following order:
```
- Partition 1 [Row1, Row2]
- Partition 2 [Row3]
- Partition 3 []
```
In this case the `Last` function will currently return a null, instead of the 
value of `Row3`.

This PR fixes this by adding a `valueSet` flag to the `Last` function.

## How was this patch tested?
We only used end to end tests for `DeclarativeAggregateFunction`s. I have added 
an evaluator for these functions so we can tests them in catalyst. I have added 
a `LastTestSuite` to test the `Last` aggregate function.

Author: Herman van Hovell 

Closes #15348 from hvanhovell/SPARK-17758.

(cherry picked from commit 5fd54b994e2078dbf0794932b4e0ffa9a9eda0c3)
Signed-off-by: Yin Huai 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3b6463a7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3b6463a7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3b6463a7

Branch: refs/heads/branch-2.0
Commit: 3b6463a794a754d630d69398f009c055664dd905
Parents: b8df2e5
Author: Herman van Hovell 
Authored: Wed Oct 5 16:05:30 2016 -0700
Committer: Yin Huai 
Committed: Wed Oct 5 16:05:47 2016 -0700

--
 .../catalyst/expressions/aggregate/Last.scala   |  27 ++---
 .../DeclarativeAggregateEvaluator.scala |  61 +++
 .../expressions/aggregate/LastTestSuite.scala   | 109 +++
 3 files changed, 184 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3b6463a7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
index af88403..8579f72 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
@@ -55,34 +55,35 @@ case class Last(child: Expression, ignoreNullsExpr: 
Expression) extends Declarat
 
   private lazy val last = AttributeReference("last", child.dataType)()
 
-  override lazy val aggBufferAttributes: Seq[AttributeReference] = last :: Nil
+  private lazy val valueSet = AttributeReference("valueSet", BooleanType)()
+
+  override lazy val aggBufferAttributes: Seq[AttributeReference] = last :: 
valueSet :: Nil
 
   override lazy val initialValues: Seq[Literal] = Seq(
-/* last = */ Literal.create(null, child.dataType)
+/* last = */ Literal.create(null, child.dataType),
+/* valueSet = */ Literal.create(false, BooleanType)
   )
 
   override lazy val updateExpressions: Seq[Expression] = {
 if (ignoreNulls) {
   Seq(
-/* last = */ If(IsNull(child), last, child)
+/* last = */ If(IsNull(child), last, child),
+/* valueSet = */ Or(valueSet, IsNotNull(child))
   )
 } else {
   Seq(
-/* last = */ child
+/* last = */ child,
+/* valueSet = */ Literal.create(true, BooleanType)
   )
 }
   }
 
   override lazy val mergeExpressions: Seq[Expression] = {
-if (ignoreNulls) {
-  Seq(
-/* last = */ If(IsNull(last.right), last.left, last.right)
-  )
-} else {
-  Seq(
-/* last = */ last.right
-  )
-}
+// Prefer the right hand expression if it has been set.
+Seq(
+  /* last = */ If(valueSet.right, last.right, last.left),
+  /* valueSet = */ Or(valueSet.right, valueSet.left)
+)
   }
 
   override lazy val evaluateExpression: AttributeReference = last

http://git-wip-us.apache.org/repos/asf/spark/blob/3b6463a7/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
new file mode 100644
index 000..614f24d
--- /dev/null
+++ 

spark git commit: [SPARK-17778][TESTS] Mock SparkContext to reduce memory usage of BlockManagerSuite

2016-10-05 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 a4f7df423 -> b8df2e53c


[SPARK-17778][TESTS] Mock SparkContext to reduce memory usage of 
BlockManagerSuite

## What changes were proposed in this pull request?

Mock SparkContext to reduce memory usage of BlockManagerSuite

## How was this patch tested?

Jenkins

Author: Shixiong Zhu 

Closes #15350 from zsxwing/SPARK-17778.

(cherry picked from commit 221b418b1c9db7b04c600b6300d18b034a4f444e)
Signed-off-by: Shixiong Zhu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b8df2e53
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b8df2e53
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b8df2e53

Branch: refs/heads/branch-2.0
Commit: b8df2e53c38a30f51c710543c81279a59a9ab4fc
Parents: a4f7df4
Author: Shixiong Zhu 
Authored: Wed Oct 5 14:54:55 2016 -0700
Committer: Shixiong Zhu 
Committed: Wed Oct 5 14:55:29 2016 -0700

--
 .../test/scala/org/apache/spark/storage/BlockManagerSuite.scala | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b8df2e53/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala 
b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index e93eee2..1b3197a 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -107,7 +107,10 @@ class BlockManagerSuite extends SparkFunSuite with 
Matchers with BeforeAndAfterE
 rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr)
 conf.set("spark.driver.port", rpcEnv.address.port.toString)
 
-sc = new SparkContext("local", "test", conf)
+// Mock SparkContext to reduce the memory usage of tests. It's fine since 
the only reason we
+// need to create a SparkContext is to initialize LiveListenerBus.
+sc = mock(classOf[SparkContext])
+when(sc.conf).thenReturn(conf)
 master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager",
   new BlockManagerMasterEndpoint(rpcEnv, true, conf,
 new LiveListenerBus(sc))), conf, true)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-17778][TESTS] Mock SparkContext to reduce memory usage of BlockManagerSuite

2016-10-05 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/master 9df54f532 -> 221b418b1


[SPARK-17778][TESTS] Mock SparkContext to reduce memory usage of 
BlockManagerSuite

## What changes were proposed in this pull request?

Mock SparkContext to reduce memory usage of BlockManagerSuite

## How was this patch tested?

Jenkins

Author: Shixiong Zhu 

Closes #15350 from zsxwing/SPARK-17778.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/221b418b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/221b418b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/221b418b

Branch: refs/heads/master
Commit: 221b418b1c9db7b04c600b6300d18b034a4f444e
Parents: 9df54f5
Author: Shixiong Zhu 
Authored: Wed Oct 5 14:54:55 2016 -0700
Committer: Shixiong Zhu 
Committed: Wed Oct 5 14:54:55 2016 -0700

--
 .../test/scala/org/apache/spark/storage/BlockManagerSuite.scala | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/221b418b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala 
b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 1652fcd..705c355 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -107,7 +107,10 @@ class BlockManagerSuite extends SparkFunSuite with 
Matchers with BeforeAndAfterE
 rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr)
 conf.set("spark.driver.port", rpcEnv.address.port.toString)
 
-sc = new SparkContext("local", "test", conf)
+// Mock SparkContext to reduce the memory usage of tests. It's fine since 
the only reason we
+// need to create a SparkContext is to initialize LiveListenerBus.
+sc = mock(classOf[SparkContext])
+when(sc.conf).thenReturn(conf)
 master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager",
   new BlockManagerMasterEndpoint(rpcEnv, true, conf,
 new LiveListenerBus(sc))), conf, true)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] Git Push Summary

2016-10-05 Thread rxin
Repository: spark
Updated Tags:  refs/tags/v2.0.1-rc1 [deleted] 00f2e28ed

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] Git Push Summary

2016-10-05 Thread rxin
Repository: spark
Updated Tags:  refs/tags/v2.0.1 [created] 933d2c1ea

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] Git Push Summary

2016-10-05 Thread rxin
Repository: spark
Updated Tags:  refs/tags/v2.0.1-rc4 [deleted] 933d2c1ea

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] Git Push Summary

2016-10-05 Thread rxin
Repository: spark
Updated Tags:  refs/tags/v2.0.1-rc3 [deleted] 9d28cc103

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] Git Push Summary

2016-10-05 Thread rxin
Repository: spark
Updated Tags:  refs/tags/v2.0.1-rc2 [deleted] 04141ad49

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-17239][ML][DOC] Update user guide for multiclass logistic regression

2016-10-05 Thread dbtsai
Repository: spark
Updated Branches:
  refs/heads/master 6a05eb24d -> 9df54f532


[SPARK-17239][ML][DOC] Update user guide for multiclass logistic regression

## What changes were proposed in this pull request?
Updates user guide to reflect that LogisticRegression now supports multiclass. 
Also adds new examples to show multiclass training.

## How was this patch tested?
Ran locally using spark-submit, run-example, and copy/paste from user guide 
into shells. Generated docs and verified correct output.

Author: sethah 

Closes #15349 from sethah/SPARK-17239.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9df54f53
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9df54f53
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9df54f53

Branch: refs/heads/master
Commit: 9df54f5325c2942bb77008ff1810e2fb5f6d848b
Parents: 6a05eb2
Author: sethah 
Authored: Wed Oct 5 18:28:21 2016 +
Committer: DB Tsai 
Committed: Wed Oct 5 18:28:21 2016 +

--
 docs/ml-classification-regression.md| 65 +---
 ...LogisticRegressionWithElasticNetExample.java | 14 +
 ...LogisticRegressionWithElasticNetExample.java | 55 +
 .../ml/logistic_regression_with_elastic_net.py  | 10 +++
 ...lass_logistic_regression_with_elastic_net.py | 48 +++
 ...ogisticRegressionWithElasticNetExample.scala | 13 
 ...ogisticRegressionWithElasticNetExample.scala | 57 +
 7 files changed, 255 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9df54f53/docs/ml-classification-regression.md
--
diff --git a/docs/ml-classification-regression.md 
b/docs/ml-classification-regression.md
index 7c2437e..bb2e404 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -34,17 +34,22 @@ discussing specific classes of algorithms, such as linear 
methods, trees, and en
 
 ## Logistic regression
 
-Logistic regression is a popular method to predict a binary response. It is a 
special case of [Generalized Linear 
models](https://en.wikipedia.org/wiki/Generalized_linear_model) that predicts 
the probability of the outcome.
-For more background and more details about the implementation, refer to the 
documentation of the [logistic regression in 
`spark.mllib`](mllib-linear-methods.html#logistic-regression). 
+Logistic regression is a popular method to predict a categorical response. It 
is a special case of [Generalized Linear 
models](https://en.wikipedia.org/wiki/Generalized_linear_model) that predicts 
the probability of the outcomes.
+In `spark.ml` logistic regression can be used to predict a binary outcome by 
using binomial logistic regression, or it can be used to predict a multiclass 
outcome by using multinomial logistic regression. Use the `family`
+parameter to select between these two algorithms, or leave it unset and Spark 
will infer the correct variant.
 
-  > The current implementation of logistic regression in `spark.ml` only 
supports binary classes. Support for multiclass regression will be added in the 
future.
+  > Multinomial logistic regression can be used for binary classification by 
setting the `family` param to "multinomial". It will produce two sets of 
coefficients and two intercepts.
 
   > When fitting LogisticRegressionModel without intercept on dataset with 
constant nonzero column, Spark MLlib outputs zero coefficients for constant 
nonzero columns. This behavior is the same as R glmnet but different from 
LIBSVM.
 
+### Binomial logistic regression
+
+For more background and more details about the implementation of binomial 
logistic regression, refer to the documentation of [logistic regression in 
`spark.mllib`](mllib-linear-methods.html#logistic-regression). 
+
 **Example**
 
-The following example shows how to train a logistic regression model
-with elastic net regularization. `elasticNetParam` corresponds to
+The following example shows how to train binomial and multinomial logistic 
regression 
+models for binary classification with elastic net regularization. 
`elasticNetParam` corresponds to
 $\alpha$ and `regParam` corresponds to $\lambda$.
 
 
@@ -92,8 +97,8 @@ provides a summary for a
 
[`LogisticRegressionModel`](api/java/org/apache/spark/ml/classification/LogisticRegressionModel.html).
 Currently, only binary classification is supported and the
 summary must be explicitly cast to
-[`BinaryLogisticRegressionTrainingSummary`](api/java/org/apache/spark/ml/classification/BinaryLogisticRegressionTrainingSummary.html).
-This will likely change when multiclass classification is supported.

spark git commit: [SPARK-17328][SQL] Fix NPE with EXPLAIN DESCRIBE TABLE

2016-10-05 Thread hvanhovell
Repository: spark
Updated Branches:
  refs/heads/master 89516c1c4 -> 6a05eb24d


[SPARK-17328][SQL] Fix NPE with EXPLAIN DESCRIBE TABLE

## What changes were proposed in this pull request?

This PR fixes the following NPE scenario in two ways.

**Reported Error Scenario**
```scala
scala> sql("EXPLAIN DESCRIBE TABLE x").show(truncate = false)
INFO SparkSqlParser: Parsing command: EXPLAIN DESCRIBE TABLE x
java.lang.NullPointerException
```

- **DESCRIBE**: Extend `DESCRIBE` syntax to accept `TABLE`.
- **EXPLAIN**: Prevent NPE in case of the parsing failure of target statement, 
e.g., `EXPLAIN DESCRIBE TABLES x`.

## How was this patch tested?

Pass the Jenkins test with a new test case.

Author: Dongjoon Hyun 

Closes #15357 from dongjoon-hyun/SPARK-17328.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6a05eb24
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6a05eb24
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6a05eb24

Branch: refs/heads/master
Commit: 6a05eb24d043aa93390f353850d56efa6124e063
Parents: 89516c1
Author: Dongjoon Hyun 
Authored: Wed Oct 5 10:52:43 2016 -0700
Committer: Herman van Hovell 
Committed: Wed Oct 5 10:52:43 2016 -0700

--
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |  2 +-
 .../spark/sql/execution/SparkSqlParser.scala|  4 +-
 .../resources/sql-tests/inputs/describe.sql |  4 ++
 .../sql-tests/results/describe.sql.out  | 58 +++-
 .../sql/execution/SparkSqlParserSuite.scala | 18 +-
 5 files changed, 68 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6a05eb24/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
--
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 87719d9..6a94def 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -136,7 +136,7 @@ statement
 | SHOW CREATE TABLE tableIdentifier
#showCreateTable
 | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName
#describeFunction
 | (DESC | DESCRIBE) DATABASE EXTENDED? identifier  
#describeDatabase
-| (DESC | DESCRIBE) option=(EXTENDED | FORMATTED)?
+| (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
 tableIdentifier partitionSpec? describeColName?
#describeTable
 | REFRESH TABLE tableIdentifier
#refreshTable
 | REFRESH .*?  
#refreshResource

http://git-wip-us.apache.org/repos/asf/spark/blob/6a05eb24/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 7f1e23e..085bb9f 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -265,7 +265,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
 }
 
 val statement = plan(ctx.statement)
-if (isExplainableStatement(statement)) {
+if (statement == null) {
+  null  // This is enough since ParseException will raise later.
+} else if (isExplainableStatement(statement)) {
   ExplainCommand(statement, extended = ctx.EXTENDED != null, codegen = 
ctx.CODEGEN != null)
 } else {
   ExplainCommand(OneRowRelation)

http://git-wip-us.apache.org/repos/asf/spark/blob/6a05eb24/sql/core/src/test/resources/sql-tests/inputs/describe.sql
--
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql 
b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
index 3f0ae90..84503d0 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -2,8 +2,12 @@ CREATE TABLE t (a STRING, b INT) PARTITIONED BY (c STRING, d 
STRING);
 
 ALTER TABLE t ADD PARTITION (c='Us', d=1);
 
+DESCRIBE t;
+
 DESC t;
 
+DESC TABLE t;
+
 -- Ignore these because there exist timestamp results, e.g., `Create Table`.
 -- DESC EXTENDED t;
 -- DESC FORMATTED t;


spark git commit: [SPARK-17258][SQL] Parse scientific decimal literals as decimals

2016-10-05 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master c9fe10d4e -> 89516c1c4


[SPARK-17258][SQL] Parse scientific decimal literals as decimals

## What changes were proposed in this pull request?
Currently Spark SQL parses regular decimal literals (e.g. `10.00`) as decimals 
and scientific decimal literals (e.g. `10.0e10`) as doubles. The difference 
between the two confuses most users. This PR unifies the parsing behavior and 
also parses scientific decimal literals as decimals.

This implications in tests are limited to a single Hive compatibility test.

## How was this patch tested?
Updated tests in `ExpressionParserSuite` and `SQLQueryTestSuite`.

Author: Herman van Hovell 

Closes #14828 from hvanhovell/SPARK-17258.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/89516c1c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/89516c1c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/89516c1c

Branch: refs/heads/master
Commit: 89516c1c4a167249b0c82f60a62edb45ede3bd2c
Parents: c9fe10d
Author: Herman van Hovell 
Authored: Tue Oct 4 23:48:26 2016 -0700
Committer: Reynold Xin 
Committed: Tue Oct 4 23:48:26 2016 -0700

--
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |  7 +-
 .../spark/sql/catalyst/parser/AstBuilder.scala  |  8 ---
 .../catalyst/parser/ExpressionParserSuite.scala | 24 ++--
 .../resources/sql-tests/inputs/literals.sql |  8 ---
 .../sql-tests/results/arithmetic.sql.out|  2 +-
 .../sql-tests/results/literals.sql.out  | 24 +---
 .../hive/execution/HiveCompatibilitySuite.scala |  4 +++-
 7 files changed, 38 insertions(+), 39 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/89516c1c/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
--
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index c336a0c..87719d9 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -653,7 +653,6 @@ quotedIdentifier
 
 number
 : MINUS? DECIMAL_VALUE#decimalLiteral
-| MINUS? SCIENTIFIC_DECIMAL_VALUE #scientificDecimalLiteral
 | MINUS? INTEGER_VALUE#integerLiteral
 | MINUS? BIGINT_LITERAL   #bigIntLiteral
 | MINUS? SMALLINT_LITERAL #smallIntLiteral
@@ -944,12 +943,8 @@ INTEGER_VALUE
 ;
 
 DECIMAL_VALUE
-: DECIMAL_DIGITS {isValidDecimal()}?
-;
-
-SCIENTIFIC_DECIMAL_VALUE
 : DIGIT+ EXPONENT
-| DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
+| DECIMAL_DIGITS EXPONENT? {isValidDecimal()}?
 ;
 
 DOUBLE_LITERAL

http://git-wip-us.apache.org/repos/asf/spark/blob/89516c1c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index cd0c70a..bf3f302 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1283,14 +1283,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
   }
 
   /**
-   * Create a double literal for a number denoted in scientific notation.
-   */
-  override def visitScientificDecimalLiteral(
-  ctx: ScientificDecimalLiteralContext): Literal = withOrigin(ctx) {
-Literal(ctx.getText.toDouble)
-  }
-
-  /**
* Create a decimal literal for a regular decimal number.
*/
   override def visitDecimalLiteral(ctx: DecimalLiteralContext): Literal = 
withOrigin(ctx) {

http://git-wip-us.apache.org/repos/asf/spark/blob/89516c1c/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 3718ac5..0fb1138 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -352,6 +352,10 @@ class ExpressionParserSuite extends PlanTest {
   }