[ https://issues.apache.org/jira/browse/SPARK-44988?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17773215#comment-17773215 ]
Miles Granger commented on SPARK-44988: --------------------------------------- [~fanjia]that "worked" for me, but then of course need to cast the resulting bigint to a timestamp, which I feel is error prone. Would be nice if spark supported timestamp[ns] though. > Parquet INT64 (TIMESTAMP(NANOS,false)) throwing Illegal Parquet type > -------------------------------------------------------------------- > > Key: SPARK-44988 > URL: https://issues.apache.org/jira/browse/SPARK-44988 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.4.0, 3.4.1 > Reporter: Flavio Odas > Priority: Critical > > This bug seems similar to https://issues.apache.org/jira/browse/SPARK-40819, > except that it's a problem with INT64 (TIMESTAMP(NANOS,false)), instead of > INT64 (TIMESTAMP(NANOS,true)). > The error happens whenever I'm trying to read: > {code:java} > org.apache.spark.sql.AnalysisException: Illegal Parquet type: INT64 > (TIMESTAMP(NANOS,false)). > at > org.apache.spark.sql.errors.QueryCompilationErrors$.illegalParquetTypeError(QueryCompilationErrors.scala:1762) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter.illegalType$1(ParquetSchemaConverter.scala:206) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter.$anonfun$convertPrimitiveField$2(ParquetSchemaConverter.scala:283) > at scala.Option.getOrElse(Option.scala:189) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter.convertPrimitiveField(ParquetSchemaConverter.scala:224) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter.convertField(ParquetSchemaConverter.scala:187) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter.$anonfun$convertInternal$3(ParquetSchemaConverter.scala:147) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter.$anonfun$convertInternal$3$adapted(ParquetSchemaConverter.scala:117) > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) > at scala.collection.immutable.Range.foreach(Range.scala:158) > at scala.collection.TraversableLike.map(TraversableLike.scala:286) > at scala.collection.TraversableLike.map$(TraversableLike.scala:279) > at scala.collection.AbstractTraversable.map(Traversable.scala:108) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter.convertInternal(ParquetSchemaConverter.scala:117) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter.convert(ParquetSchemaConverter.scala:87) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$.$anonfun$readSchemaFromFooter$2(ParquetFileFormat.scala:493) > at scala.Option.getOrElse(Option.scala:189) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$.readSchemaFromFooter(ParquetFileFormat.scala:493) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$.$anonfun$mergeSchemasInParallel$2(ParquetFileFormat.scala:473) > at scala.collection.immutable.Stream.map(Stream.scala:418) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$.$anonfun$mergeSchemasInParallel$1(ParquetFileFormat.scala:473) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$.$anonfun$mergeSchemasInParallel$1$adapted(ParquetFileFormat.scala:464) > at > org.apache.spark.sql.execution.datasources.SchemaMergeUtils$.$anonfun$mergeSchemasInParallel$2(SchemaMergeUtils.scala:79) > at org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2(RDD.scala:853) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitions$2$adapted(RDD.scala:853) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) > at > org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161) > at org.apache.spark.scheduler.Task.run(Task.scala:139) > at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557) {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org