Hi, Received the following error when reading an Avro source with Spark 1.5.0 and the com.databricks.spark.avro reader. In the data source, there is one nested field named "UserActivity.history.activity" and another named "UserActivity.activity". This seems to be the reason for the execption, since the two fields are named the same but in different levels in the hierarchy.
Any ideas of how to get around this? Execption occurs directly when trying to load the data. Thanks, Anders 15/09/26 11:42:41 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, lon4-hadoopslave-a148.lon4.spotify.net): org.apache.avro.AvroRuntimeException: Bad in dex at com.spotify.analytics.schema.UserActivity.put(UserActivity.java:60) at org.apache.avro.generic.GenericData.setField(GenericData.java:573) at org.apache.avro.generic.GenericData.setField(GenericData.java:590) at org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:193) at org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:183) at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:151) at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:155) at org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:193) at org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:183) at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:151) at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:142) at org.apache.avro.file.DataFileStream.next(DataFileStream.java:233) at org.apache.avro.mapred.AvroRecordReader.next(AvroRecordReader.java:66) at org.apache.avro.mapred.AvroRecordReader.next(AvroRecordReader.java:32) at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:248) at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:216) at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$class.isEmpty(Iterator.scala:256) at scala.collection.AbstractIterator.isEmpty(Iterator.scala:1157) at com.databricks.spark.avro.AvroRelation$$anonfun$buildScan$1$$anonfun$4.apply(AvroRelation.scala:127) at com.databricks.spark.avro.AvroRelation$$anonfun$buildScan$1$$anonfun$4.apply(AvroRelation.scala:126) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:706) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:706) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:87) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)