[ https://issues.apache.org/jira/browse/SPARK-16698?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15391217#comment-15391217 ]
Hyukjin Kwon commented on SPARK-16698: -------------------------------------- FYI, this does not happen when it is read from json RDD. Let me please leave a code to reproduce that self-contains the issue. This does not work (in `JsonSuite.scala`) {code} test("SPARK-16698 - json parsing regression - "." in keys") { withTempPath { path => val json =""" {"a.b":"data"}""" spark.sparkContext .parallelize(json :: Nil) .saveAsTextFile(path.getAbsolutePath) spark.read.json(path.getAbsolutePath).collect() } } {code} This works {code} test("SPARK-16698 - json parsing regression - "." in keys") { withTempPath { path => val json =""" {"a.b":"data"}""" val rdd = spark.sparkContext .parallelize(json :: Nil) spark.read.json(rdd).collect() } } {code} > json parsing regression - "." in keys > ------------------------------------- > > Key: SPARK-16698 > URL: https://issues.apache.org/jira/browse/SPARK-16698 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.0.0 > Reporter: TobiasP > > The commit 83775bc78e183791f75a99cdfbcd68a67ca0d472 "\[SPARK-14158]\[SQL] > implement buildReader for json data source" breaks parsing of json files with > "." in keys. > E.g. the test input for spark-solr > https://github.com/lucidworks/spark-solr/blob/master/src/test/resources/test-data/events.json > {noformat} > scala> > sqlContext.read.json("src/test/resources/test-data/events.json").collectAsList > org.apache.spark.sql.AnalysisException: Unable to resolve params.title_s > given [_version_, count_l, doc_id_s, flag_s, id, params.title_s, > params.url_s, session_id_s, timestamp_tdt, type_s, tz_timestamp_txt, > user_id_s]; > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolve$1$$anonfun$apply$5.apply(LogicalPlan.scala:131) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolve$1$$anonfun$apply$5.apply(LogicalPlan.scala:131) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolve$1.apply(LogicalPlan.scala:130) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolve$1.apply(LogicalPlan.scala:126) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at scala.collection.Iterator$class.foreach(Iterator.scala:742) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) > at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) > at org.apache.spark.sql.types.StructType.foreach(StructType.scala:94) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:245) > at org.apache.spark.sql.types.StructType.map(StructType.scala:94) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolve(LogicalPlan.scala:126) > at > org.apache.spark.sql.execution.datasources.FileSourceStrategy$.apply(FileSourceStrategy.scala:80) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:396) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54) > at > org.apache.spark.sql.execution.SparkStrategies$SpecialLimits$.apply(SparkStrategies.scala:53) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:396) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:52) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:50) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:57) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:57) > at org.apache.spark.sql.Dataset.withCallback(Dataset.scala:2321) > at org.apache.spark.sql.Dataset.collectAsList(Dataset.scala:2040) > ... 49 elided > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org