[GitHub] spark pull request #20302: [SPARK-23094] Fix invalid character handling in J...

gatorsmile Fri, 19 Jan 2018 09:23:52 -0800

Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20302#discussion_r162682668
  
    --- Diff: 
sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
 ---
    @@ -105,4 +107,36 @@ class JsonHadoopFsRelationSuite extends 
HadoopFsRelationTest {
           )
         }
       }
    +
    +  test("invalid json with leading nulls - from file (multiLine=true)") {
    +    import testImplicits._
    +    withTempDir { tempDir =>
    +      val path = tempDir.getAbsolutePath
    +      Seq(badJson, """{"a":1}""").toDS().write.mode("overwrite").text(path)
    +      val expected = s"""$badJson\n{"a":1}\n"""
    +      val schema = new StructType().add("a", 
IntegerType).add("_corrupt_record", StringType)
    +      val df =
    +        spark.read.format(dataSourceName).option("multiLine", 
true).schema(schema).load(path)
    +      checkAnswer(df, Row(null, expected))
    +    }
    +  }
    +
    +  test("invalid json with leading nulls - from file (multiLine=false)") {
    +    import testImplicits._
    +    withTempDir { tempDir =>
    +      val path = tempDir.getAbsolutePath
    +      Seq(badJson, """{"a":1}""").toDS().write.mode("overwrite").text(path)
    +      val schema = new StructType().add("a", 
IntegerType).add("_corrupt_record", StringType)
    +      val df =
    +        spark.read.format(dataSourceName).option("multiLine", 
false).schema(schema).load(path)
    +      checkAnswer(df, Seq(Row(1, null), Row(null, badJson)))
    +    }
    +  }
    +
    +  test("invalid json with leading nulls - from dataset") {
    --- End diff --
    
    See the PR https://github.com/apache/spark/pull/20331



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #20302: [SPARK-23094] Fix invalid character handling in J...

Reply via email to