Repository: spark Updated Branches: refs/heads/master 7dd01d9c0 -> a234cc614
[SPARK-14874][SQL][STREAMING] Remove the obsolete Batch representation ## What changes were proposed in this pull request? The `Batch` class, which had been used to indicate progress in a stream, was abandoned by [[SPARK-13985][SQL] Deterministic batches with ids](https://github.com/apache/spark/commit/caea15214571d9b12dcf1553e5c1cc8b83a8ba5b) and then became useless. This patch: - removes the `Batch` class - ~~does some related renaming~~ (update: this has been reverted) - fixes some related comments ## How was this patch tested? N/A Author: Liwei Lin <lwl...@gmail.com> Closes #12638 from lw-lin/remove-batch. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a234cc61 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a234cc61 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a234cc61 Branch: refs/heads/master Commit: a234cc61465bbefafd9e69c1cabe9aaaf968a91f Parents: 7dd01d9 Author: Liwei Lin <lwl...@gmail.com> Authored: Wed Apr 27 10:25:33 2016 -0700 Committer: Michael Armbrust <mich...@databricks.com> Committed: Wed Apr 27 10:25:33 2016 -0700 ---------------------------------------------------------------------- .../spark/sql/execution/streaming/Batch.scala | 26 -------------------- .../execution/streaming/FileStreamSource.scala | 2 +- .../spark/sql/execution/streaming/Sink.scala | 2 +- .../spark/sql/execution/streaming/Source.scala | 2 +- .../spark/sql/execution/streaming/memory.scala | 2 +- 5 files changed, 4 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a234cc61/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Batch.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Batch.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Batch.scala deleted file mode 100644 index 1f25eb8..0000000 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Batch.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.streaming - -import org.apache.spark.sql.DataFrame - -/** - * Used to pass a batch of data through a streaming query execution along with an indication - * of progress in the stream. - */ -class Batch(val end: Offset, val data: DataFrame) http://git-wip-us.apache.org/repos/asf/spark/blob/a234cc61/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala index 681adde..8e66538 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala @@ -88,7 +88,7 @@ class FileStreamSource( } /** - * Returns the next batch of data that is available after `start`, if any is available. + * Returns the data that is between the offsets (`start`, `end`]. */ override def getBatch(start: Option[Offset], end: Offset): DataFrame = { val startId = start.map(_.asInstanceOf[LongOffset].offset).getOrElse(-1L) http://git-wip-us.apache.org/repos/asf/spark/blob/a234cc61/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala index 25015d5..e641e09 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.DataFrame trait Sink { /** - * Adds a batch of data to this sink. The data for a given `batchId` is deterministic and if + * Adds a batch of data to this sink. The data for a given `batchId` is deterministic and if * this method is called more than once with the same batchId (which will happen in the case of * failures), then `data` should only be added once. */ http://git-wip-us.apache.org/repos/asf/spark/blob/a234cc61/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala index 1d2f7a8..14450c2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala @@ -34,7 +34,7 @@ trait Source { def getOffset: Option[Offset] /** - * Returns the data that is between the offsets (`start`, `end`]. When `start` is `None` then + * Returns the data that is between the offsets (`start`, `end`]. When `start` is `None` then * the batch should begin with the first available record. This method must always return the * same data for a particular `start` and `end` pair. */ http://git-wip-us.apache.org/repos/asf/spark/blob/a234cc61/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala index 0d2a6dd..a34927f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala @@ -91,7 +91,7 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext) } /** - * Returns the next batch of data that is available after `start`, if any is available. + * Returns the data that is between the offsets (`start`, `end`]. */ override def getBatch(start: Option[Offset], end: Offset): DataFrame = { val startOrdinal = --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org