Github user tdas commented on a diff in the pull request:

    https://github.com/apache/spark/pull/15285#discussion_r83752553
  
    --- Diff: core/src/test/scala/org/apache/spark/util/UtilsSuite.scala ---
    @@ -274,65 +276,102 @@ class UtilsSuite extends SparkFunSuite with 
ResetSystemProperties with Logging {
         assert(str(10 * hour + 59 * minute + 59 * second + 999) === "11" + sep 
+ "00 h")
       }
     
    -  test("reading offset bytes of a file") {
    +  def getSuffix(isCompressed: Boolean): String = {
    +    if (isCompressed) {
    +      ".gz"
    +    } else {
    +      ""
    +    }
    +  }
    +
    +  def writeLogFile(path: String, content: Array[Byte]): Unit = {
    +    val outputStream = if (path.endsWith(".gz")) {
    +      new GZIPOutputStream(new FileOutputStream(path))
    +    } else {
    +      new FileOutputStream(path)
    +    }
    +    IOUtils.write(content, outputStream)
    +    outputStream.close()
    +    content.size
    +  }
    +
    +  def testOffsetBytes(isCompressed: Boolean): Unit = {
         val tmpDir2 = Utils.createTempDir()
    -    val f1Path = tmpDir2 + "/f1"
    -    val f1 = new FileOutputStream(f1Path)
    -    
f1.write("1\n2\n3\n4\n5\n6\n7\n8\n9\n".getBytes(StandardCharsets.UTF_8))
    -    f1.close()
    +    val suffix = getSuffix(isCompressed)
    +    val f1Path = tmpDir2 + "/f1" + suffix
    +    writeLogFile(f1Path, 
"1\n2\n3\n4\n5\n6\n7\n8\n9\n".getBytes(StandardCharsets.UTF_8))
    +    val f1Length = Utils.getFileLength(new File(f1Path))
     
         // Read first few bytes
    -    assert(Utils.offsetBytes(f1Path, 0, 5) === "1\n2\n3")
    +    assert(Utils.offsetBytes(f1Path, f1Length, 0, 5) === "1\n2\n3")
     
         // Read some middle bytes
    -    assert(Utils.offsetBytes(f1Path, 4, 11) === "3\n4\n5\n6")
    +    assert(Utils.offsetBytes(f1Path, f1Length, 4, 11) === "3\n4\n5\n6")
     
         // Read last few bytes
    -    assert(Utils.offsetBytes(f1Path, 12, 18) === "7\n8\n9\n")
    +    assert(Utils.offsetBytes(f1Path, f1Length, 12, 18) === "7\n8\n9\n")
     
         // Read some nonexistent bytes in the beginning
    -    assert(Utils.offsetBytes(f1Path, -5, 5) === "1\n2\n3")
    +    assert(Utils.offsetBytes(f1Path, f1Length, -5, 5) === "1\n2\n3")
     
         // Read some nonexistent bytes at the end
    -    assert(Utils.offsetBytes(f1Path, 12, 22) === "7\n8\n9\n")
    +    assert(Utils.offsetBytes(f1Path, f1Length, 12, 22) === "7\n8\n9\n")
     
         // Read some nonexistent bytes on both ends
    -    assert(Utils.offsetBytes(f1Path, -3, 25) === 
"1\n2\n3\n4\n5\n6\n7\n8\n9\n")
    +    assert(Utils.offsetBytes(f1Path, f1Length, -3, 25) === 
"1\n2\n3\n4\n5\n6\n7\n8\n9\n")
     
         Utils.deleteRecursively(tmpDir2)
       }
     
    -  test("reading offset bytes across multiple files") {
    +  test("reading offset bytes of a file") {
    +    testOffsetBytes(isCompressed = false)
    +  }
    +
    +  test("reading offset bytes of a file (compressed)") {
    +    testOffsetBytes(isCompressed = true)
    +  }
    +
    +  def testOffsetBytesMultipleFiles(isCompressed: Boolean): Unit = {
         val tmpDir = Utils.createTempDir()
    -    val files = (1 to 3).map(i => new File(tmpDir, i.toString))
    -    Files.write("0123456789", files(0), StandardCharsets.UTF_8)
    -    Files.write("abcdefghij", files(1), StandardCharsets.UTF_8)
    -    Files.write("ABCDEFGHIJ", files(2), StandardCharsets.UTF_8)
    +    val suffix = getSuffix(isCompressed)
    +    val files = (1 to 3).map(i => new File(tmpDir, i.toString + suffix))
    +    writeLogFile(files(0).getAbsolutePath, 
"0123456789".getBytes(StandardCharsets.UTF_8))
    +    writeLogFile(files(1).getAbsolutePath, 
"abcdefghij".getBytes(StandardCharsets.UTF_8))
    +    writeLogFile(files(2).getAbsolutePath, 
"ABCDEFGHIJ".getBytes(StandardCharsets.UTF_8))
    --- End diff --
    
    could you test mixed compressed and uncompressed files. i think that case 
arises when we compressed rolled files, and active file. 
    
    just having another file(3) which is always uncompressed should be fine.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to