[ https://issues.apache.org/jira/browse/SPARK-28986?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
iduanyingjie updated SPARK-28986: --------------------------------- Description: In spark sql, when using this from_json to parse the json string of a field, the defined field information is case-sensitive, such as [a String] in this example, if I want to parse this data with id 2 It won't work. I can no longer define a column of [A String] in the schema, because the schema field name in the DataFrame is not case sensitive, I feel this is a bit contradictory. Code: {code:java} import org.apache.spark.sql.SparkSession object JsonCaseInsensitive { case class User(id: String, fields: String) val users = Seq(User("1", "{\"a\": \"b\"}"), User("2", "{\"A\": \"B\"}")) def main(args: Array[String]): Unit = { val spark = SparkSession.builder().master("local").appName("JsonCaseInsensitive").getOrCreate() import spark.implicits._ spark.createDataset(users) .selectExpr("id", "from_json(fields, 'a String')") .show() } } {code} Output: {code:java} +---+---------------------+ | id|jsontostructs(fields)| +---+---------------------+ | 1| [b]| | 2| []| +---+---------------------+ {code} was: {code:java} import org.apache.spark.sql.SparkSession object JsonCaseInsensitive { case class User(id: String, fields: String) val users = Seq(User("1", "{\"a\": \"b\"}"), User("2", "{\"A\": \"B\"}")) def main(args: Array[String]): Unit = { val spark = SparkSession.builder().master("local").appName("JsonCaseInsensitive").getOrCreate() import spark.implicits._ spark.createDataset(users) .selectExpr("id", "from_json(fields, 'a String')") .show() } } {code} {code:java} +---+---------------------+ | id|jsontostructs(fields)| +---+---------------------+ | 1| [b]| | 2| []| +---+---------------------+ {code} > from_json cannot handle data with the same name but different case in the > json string. > -------------------------------------------------------------------------------------- > > Key: SPARK-28986 > URL: https://issues.apache.org/jira/browse/SPARK-28986 > Project: Spark > Issue Type: New Feature > Components: SQL > Affects Versions: 2.4.4 > Reporter: iduanyingjie > Priority: Major > > In spark sql, when using this from_json to parse the json string of a field, > the defined field information is case-sensitive, such as [a String] in this > example, if I want to parse this data with id 2 It won't work. I can no > longer define a column of [A String] in the schema, because the schema field > name in the DataFrame is not case sensitive, I feel this is a bit > contradictory. > > Code: > {code:java} > import org.apache.spark.sql.SparkSession > object JsonCaseInsensitive { > case class User(id: String, fields: String) > val users = Seq(User("1", "{\"a\": \"b\"}"), User("2", "{\"A\": \"B\"}")) > def main(args: Array[String]): Unit = { > val spark = > SparkSession.builder().master("local").appName("JsonCaseInsensitive").getOrCreate() > import spark.implicits._ > spark.createDataset(users) > .selectExpr("id", "from_json(fields, 'a String')") > .show() > } > } > {code} > Output: > {code:java} > +---+---------------------+ > | id|jsontostructs(fields)| > +---+---------------------+ > | 1| [b]| > | 2| []| > +---+---------------------+ > {code} -- This message was sent by Atlassian Jira (v8.3.2#803003) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org