Github user mengxr commented on a diff in the pull request: https://github.com/apache/spark/pull/20929#discussion_r180293096 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala --- @@ -624,6 +624,42 @@ class FileStreamSourceSuite extends FileStreamSourceTest { } } + test("SPARK-23772 Ignore column of all null values or empty array during JSON schema inference") { --- End diff -- We need more test coverage. I have a similar internal implementation that tests the following cases (ignore the actual test, just look at the example records): ```scala test("null") { assert(removeNullRecursively("null") === "null") } test("empty string") { assert(removeNullRecursively("\"\"") === "\"\"") } test("empty object") { assert(removeNullRecursively("{}") === "null") } test("object with all null values") { val json = """{"a":null,"b":null, "c":null}""" assert(removeNullRecursively(json) === "null") } test("object with some null fields") { val json = """{"a":null,"b":"c","d":null,"e":"f"}""" val expected = """{"b":"c","e":"f"}""" assert(removeNullRecursively(json) === expected) } test("object with some nested null values") { val json = """{"a":{},"b":{"c":null},"d":{"c":"e"},"f":{"c":null,"g":"h"}}""" val expected = """{"d":{"c":"e"},"f":{"g":"h"}}""" assert(removeNullRecursively(json) === expected) } test("array with all null elements") { val json = """[null,null,{},{"a":null}]""" val expected = "null" assert(removeNullRecursively(json) === expected) } test("array with some null elements") { // TODO: is it an issue if we covert empty object to null in an array? val json = """[null,"a",null,{},"b"]""" val expected = """[null,"a",null,null,"b"]""" assert(removeNullRecursively(json) === expected) } ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org