[ https://issues.apache.org/jira/browse/SPARK-10113?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Apache Spark reassigned SPARK-10113: ------------------------------------ Assignee: Apache Spark > Support for unsigned Parquet logical types > ------------------------------------------ > > Key: SPARK-10113 > URL: https://issues.apache.org/jira/browse/SPARK-10113 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 1.5.0 > Reporter: Jordan Thomas > Assignee: Apache Spark > > Add support for unsigned Parquet logical types UINT_16, UINT_32 and UINT_64. > {code} > org.apache.spark.sql.AnalysisException: Illegal Parquet type: INT64 (UINT_64); > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.illegalType$1(CatalystSchemaConverter.scala:130) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.convertPrimitiveField(CatalystSchemaConverter.scala:169) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.convertField(CatalystSchemaConverter.scala:115) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:97) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:94) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at scala.collection.Iterator$class.foreach(Iterator.scala:742) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) > at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) > at scala.collection.AbstractIterable.foreach(Iterable.scala:54) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:245) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.org$apache$spark$sql$parquet$CatalystSchemaConverter$$convert(CatalystSchemaConverter.scala:94) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$convertGroupField$1.apply(CatalystSchemaConverter.scala:200) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$convertGroupField$1.apply(CatalystSchemaConverter.scala:200) > at scala.Option.fold(Option.scala:158) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.convertGroupField(CatalystSchemaConverter.scala:200) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.convertField(CatalystSchemaConverter.scala:116) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:97) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:94) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at scala.collection.Iterator$class.foreach(Iterator.scala:742) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) > at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) > at scala.collection.AbstractIterable.foreach(Iterable.scala:54) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:245) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.org$apache$spark$sql$parquet$CatalystSchemaConverter$$convert(CatalystSchemaConverter.scala:94) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.convert(CatalystSchemaConverter.scala:91) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$readSchemaFromFooter$2.apply(ParquetRelation.scala:734) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$readSchemaFromFooter$2.apply(ParquetRelation.scala:734) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.parquet.ParquetRelation$.readSchemaFromFooter(ParquetRelation.scala:734) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28$$anonfun$apply$8.apply(ParquetRelation.scala:714) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28$$anonfun$apply$8.apply(ParquetRelation.scala:713) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at scala.collection.Iterator$class.foreach(Iterator.scala:742) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) > at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) > at scala.collection.AbstractIterable.foreach(Iterable.scala:54) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:245) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28.apply(ParquetRelation.scala:713) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28.apply(ParquetRelation.scala:692) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:706) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:706) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) > at org.apache.spark.scheduler.Task.run(Task.scala:88) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > 15/08/19 07:10:26 WARN TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1, > localhost): org.apache.spark.sql.AnalysisException: Illegal Parquet type: > INT64 (UINT_64); > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.illegalType$1(CatalystSchemaConverter.scala:130) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.convertPrimitiveField(CatalystSchemaConverter.scala:169) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.convertField(CatalystSchemaConverter.scala:115) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:97) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:94) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at scala.collection.Iterator$class.foreach(Iterator.scala:742) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) > at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) > at scala.collection.AbstractIterable.foreach(Iterable.scala:54) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:245) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.org$apache$spark$sql$parquet$CatalystSchemaConverter$$convert(CatalystSchemaConverter.scala:94) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$convertGroupField$1.apply(CatalystSchemaConverter.scala:200) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$convertGroupField$1.apply(CatalystSchemaConverter.scala:200) > at scala.Option.fold(Option.scala:158) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.convertGroupField(CatalystSchemaConverter.scala:200) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.convertField(CatalystSchemaConverter.scala:116) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:97) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:94) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at scala.collection.Iterator$class.foreach(Iterator.scala:742) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) > at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) > at scala.collection.AbstractIterable.foreach(Iterable.scala:54) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:245) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.org$apache$spark$sql$parquet$CatalystSchemaConverter$$convert(CatalystSchemaConverter.scala:94) > at > org.apache.spark.sql.parquet.CatalystSchemaConverter.convert(CatalystSchemaConverter.scala:91) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$readSchemaFromFooter$2.apply(ParquetRelation.scala:734) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$readSchemaFromFooter$2.apply(ParquetRelation.scala:734) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.parquet.ParquetRelation$.readSchemaFromFooter(ParquetRelation.scala:734) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28$$anonfun$apply$8.apply(ParquetRelation.scala:714) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28$$anonfun$apply$8.apply(ParquetRelation.scala:713) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at scala.collection.Iterator$class.foreach(Iterator.scala:742) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) > at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) > at scala.collection.AbstractIterable.foreach(Iterable.scala:54) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:245) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28.apply(ParquetRelation.scala:713) > at > org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28.apply(ParquetRelation.scala:692) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:706) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:706) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) > at org.apache.spark.scheduler.Task.run(Task.scala:88) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org