[ https://issues.apache.org/jira/browse/SPARK-30996?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Apache Spark reassigned SPARK-30996: ------------------------------------ Assignee: Apache Spark > Able to write parquet file subsequently unable to read > ------------------------------------------------------ > > Key: SPARK-30996 > URL: https://issues.apache.org/jira/browse/SPARK-30996 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.4.4 > Environment: spark-shell 2.4.4 > Reporter: Nathan Grand > Assignee: Apache Spark > Priority: Major > > {code:java} > case class StructKey(i: Int) > case class StructValue(l: Long) > case class Outer(m: Map[StructKey, StructValue]) > val data = Seq(Seq(Outer(Map(StructKey(0) -> StructValue(1L))))) > val ds = data.toDS > ds.write.mode("overwrite").parquet("ds.parquet") > val in = spark.read.parquet("ds.parquet") > ds.printSchema > root > |-- value: array (nullable = true) > | |-- element: struct (containsNull = true) > | | |-- m: map (nullable = true) > | | | |-- key: struct > | | | | |-- i: integer (nullable = false) > | | | |-- value: struct (valueContainsNull = true) > | | | | |-- l: long (nullable = false) > ds.show(false) > +----------------+ > |value | > +----------------+ > |[[[[0] -> [1]]]]| > +----------------+ > in.printSchema > root > |-- value: array (nullable = true) > | |-- element: struct (containsNull = true) > | | |-- m: map (nullable = true) > | | | |-- key: struct > | | | | |-- i: integer (nullable = true) > | | | |-- value: struct (valueContainsNull = true) > | | | | |-- l: long (nullable = true) > in.show(false) > Caused by: org.apache.spark.sql.AnalysisException: Map key type is expected > to be a primitive type, but found: required group key { > required int32 i; > }; > at > org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter$.checkConversionRequirement(ParquetSchemaConverter.scala:583) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter$$anonfun$convertGroupField$2.apply(ParquetSchemaConverter.scala:228) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter$$anonfun$convertGroupField$2.apply(ParquetSchemaConverter.scala:183) > at scala.Option.fold(Option.scala:158) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter.convertGroupField(ParquetSchemaConverter.scala:183) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter.convertField(ParquetSchemaConverter.scala:90) > {code} > You should not be able to write something you subsequently can't read; if > attempting to write invalid parquet it should error at write, otherwise it > should successfully read. -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org