spark git commit: [SPARK-15601][CORE] CircularBuffer's toString() to print only the contents written if buffer isn't full
Repository: spark Updated Branches: refs/heads/branch-1.6 ea84b3373 -> 714f4d78a [SPARK-15601][CORE] CircularBuffer's toString() to print only the contents written if buffer isn't full 1. The class allocated 4x space than needed as it was using `Int` to store the `Byte` values 2. If CircularBuffer isn't full, currently toString() will print some garbage chars along with the content written as is tries to print the entire array allocated for the buffer. The fix is to keep track of buffer getting full and don't print the tail of the buffer if it isn't full (suggestion by sameeragarwal over https://github.com/apache/spark/pull/12194#discussion_r64495331) 3. Simplified `toString()` Added new test case Author: Tejas PatilCloses #13351 from tejasapatil/circular_buffer. (cherry picked from commit ac38bdc756c25632069e7887a657250fe2fd6d82) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/714f4d78 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/714f4d78 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/714f4d78 Branch: refs/heads/branch-1.6 Commit: 714f4d78a09c7cd0a71a3867418d5262b6a14527 Parents: ea84b33 Author: Tejas Patil Authored: Tue May 31 19:52:22 2016 -0500 Committer: Sean Owen Committed: Tue May 31 19:58:17 2016 -0500 -- .../scala/org/apache/spark/util/Utils.scala | 32 - .../org/apache/spark/util/UtilsSuite.scala | 37 2 files changed, 44 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/714f4d78/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 0bcbf26..36ab3ac 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -22,6 +22,7 @@ import java.lang.management.ManagementFactory import java.net._ import java.nio.ByteBuffer import java.nio.channels.Channels +import java.nio.charset.StandardCharsets import java.util.concurrent._ import java.util.{Locale, Properties, Random, UUID} import javax.net.ssl.HttpsURLConnection @@ -2308,29 +2309,24 @@ private[spark] class RedirectThread( * the toString method. */ private[spark] class CircularBuffer(sizeInBytes: Int = 10240) extends java.io.OutputStream { - var pos: Int = 0 - var buffer = new Array[Int](sizeInBytes) + private var pos: Int = 0 + private var isBufferFull = false + private val buffer = new Array[Byte](sizeInBytes) - def write(i: Int): Unit = { -buffer(pos) = i + def write(input: Int): Unit = { +buffer(pos) = input.toByte pos = (pos + 1) % buffer.length +isBufferFull = isBufferFull || (pos == 0) } override def toString: String = { -val (end, start) = buffer.splitAt(pos) -val input = new java.io.InputStream { - val iterator = (start ++ end).iterator - - def read(): Int = if (iterator.hasNext) iterator.next() else -1 -} -val reader = new BufferedReader(new InputStreamReader(input)) -val stringBuilder = new StringBuilder -var line = reader.readLine() -while (line != null) { - stringBuilder.append(line) - stringBuilder.append("\n") - line = reader.readLine() +if (!isBufferFull) { + return new String(buffer, 0, pos, StandardCharsets.UTF_8) } -stringBuilder.toString() + +val nonCircularBuffer = new Array[Byte](sizeInBytes) +System.arraycopy(buffer, pos, nonCircularBuffer, 0, buffer.length - pos) +System.arraycopy(buffer, 0, nonCircularBuffer, buffer.length - pos, pos) +new String(nonCircularBuffer, StandardCharsets.UTF_8) } } http://git-wip-us.apache.org/repos/asf/spark/blob/714f4d78/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 7de995a..13f85a7 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.util -import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File, FileOutputStream} +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File, FileOutputStream, PrintStream} import java.lang.{Double => JDouble, Float => JFloat} import java.net.{BindException, ServerSocket, URI} import java.nio.{ByteBuffer, ByteOrder} @@ -679,14 +679,37 @@ class UtilsSuite extends
spark git commit: [SPARK-15601][CORE] CircularBuffer's toString() to print only the contents written if buffer isn't full
Repository: spark Updated Branches: refs/heads/master 04f925ede -> ac38bdc75 [SPARK-15601][CORE] CircularBuffer's toString() to print only the contents written if buffer isn't full ## What changes were proposed in this pull request? 1. The class allocated 4x space than needed as it was using `Int` to store the `Byte` values 2. If CircularBuffer isn't full, currently toString() will print some garbage chars along with the content written as is tries to print the entire array allocated for the buffer. The fix is to keep track of buffer getting full and don't print the tail of the buffer if it isn't full (suggestion by sameeragarwal over https://github.com/apache/spark/pull/12194#discussion_r64495331) 3. Simplified `toString()` ## How was this patch tested? Added new test case Author: Tejas PatilCloses #13351 from tejasapatil/circular_buffer. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ac38bdc7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ac38bdc7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ac38bdc7 Branch: refs/heads/master Commit: ac38bdc756c25632069e7887a657250fe2fd6d82 Parents: 04f925e Author: Tejas Patil Authored: Tue May 31 19:52:22 2016 -0500 Committer: Sean Owen Committed: Tue May 31 19:52:22 2016 -0500 -- .../scala/org/apache/spark/util/Utils.scala | 31 +++- .../org/apache/spark/util/UtilsSuite.scala | 37 2 files changed, 43 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ac38bdc7/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 7e204fa..1a9dbca 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2344,29 +2344,24 @@ private[spark] class RedirectThread( * the toString method. */ private[spark] class CircularBuffer(sizeInBytes: Int = 10240) extends java.io.OutputStream { - var pos: Int = 0 - var buffer = new Array[Int](sizeInBytes) + private var pos: Int = 0 + private var isBufferFull = false + private val buffer = new Array[Byte](sizeInBytes) - def write(i: Int): Unit = { -buffer(pos) = i + def write(input: Int): Unit = { +buffer(pos) = input.toByte pos = (pos + 1) % buffer.length +isBufferFull = isBufferFull || (pos == 0) } override def toString: String = { -val (end, start) = buffer.splitAt(pos) -val input = new java.io.InputStream { - val iterator = (start ++ end).iterator - - def read(): Int = if (iterator.hasNext) iterator.next() else -1 -} -val reader = new BufferedReader(new InputStreamReader(input, StandardCharsets.UTF_8)) -val stringBuilder = new StringBuilder -var line = reader.readLine() -while (line != null) { - stringBuilder.append(line) - stringBuilder.append("\n") - line = reader.readLine() +if (!isBufferFull) { + return new String(buffer, 0, pos, StandardCharsets.UTF_8) } -stringBuilder.toString() + +val nonCircularBuffer = new Array[Byte](sizeInBytes) +System.arraycopy(buffer, pos, nonCircularBuffer, 0, buffer.length - pos) +System.arraycopy(buffer, 0, nonCircularBuffer, buffer.length - pos, pos) +new String(nonCircularBuffer, StandardCharsets.UTF_8) } } http://git-wip-us.apache.org/repos/asf/spark/blob/ac38bdc7/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 4aa4854..6698749 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.util -import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File, FileOutputStream} +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File, FileOutputStream, PrintStream} import java.lang.{Double => JDouble, Float => JFloat} import java.net.{BindException, ServerSocket, URI} import java.nio.{ByteBuffer, ByteOrder} @@ -681,14 +681,37 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { assert(!Utils.isInDirectory(nullFile, childFile3)) } - test("circular buffer") { + test("circular buffer: if nothing was written to the buffer, display nothing") { +val buffer = new CircularBuffer(4) +assert(buffer.toString === "") + } + + test("circular