This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 63bced9  [SPARK-26745][SQL][TESTS] JsonSuite test case: empty line -> 
0 record count
63bced9 is described below

commit 63bced9375ec1ec6ded220d768cd746050861a09
Author: Branden Smith <branden.sm...@publicismedia.com>
AuthorDate: Wed Feb 6 13:55:19 2019 +0800

    [SPARK-26745][SQL][TESTS] JsonSuite test case: empty line -> 0 record count
    
    ## What changes were proposed in this pull request?
    
    This PR consists of the `test` components of #23665 only, minus the 
associated patch from that PR.
    
    It adds a new unit test to `JsonSuite` which verifies that the `count()` 
returned from a `DataFrame` loaded from JSON containing empty lines does not 
include those empty lines in the record count. The test runs `count` prior to 
otherwise reading data from the `DataFrame`, so as to catch future cases where 
a pre-parsing optimization might result in `count` results inconsistent with 
existing behavior.
    
    This PR is intended to be deployed alongside #23667; `master` currently 
causes the test to fail, as described in 
[SPARK-26745](https://issues.apache.org/jira/browse/SPARK-26745).
    
    ## How was this patch tested?
    
    Manual testing, existing `JsonSuite` unit tests.
    
    Closes #23674 from sumitsu/json_emptyline_count_test.
    
    Authored-by: Branden Smith <branden.sm...@publicismedia.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../spark/sql/execution/datasources/json/JsonSuite.scala     | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 49dd9c2..6976177 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -2426,6 +2426,18 @@ class JsonSuite extends QueryTest with SharedSQLContext 
with TestJsonData {
     countForMalformedJSON(0, Seq(""))
   }
 
+  test("SPARK-26745: count() for non-multiline input with empty lines") {
+    withTempPath { tempPath =>
+      val path = tempPath.getCanonicalPath
+      Seq("""{ "a" : 1 }""", "", """     { "a" : 2 }""", " \t ")
+        .toDS()
+        .repartition(1)
+        .write
+        .text(path)
+      assert(spark.read.json(path).count() === 2)
+    }
+  }
+
   test("SPARK-25040: empty strings should be disallowed") {
     def failedOnEmptyString(dataType: DataType): Unit = {
        val df = spark.read.schema(s"a ${dataType.catalogString}")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to