wzx140 commented on code in PR #7003: URL: https://github.com/apache/hudi/pull/7003#discussion_r1027312754
########## hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetReader.java: ########## @@ -105,8 +105,11 @@ public long getTotalRecords() { } private ClosableIterator<IndexedRecord> getIndexedRecordIteratorInternal(Schema schema, Option<Schema> requestedSchema) throws IOException { - AvroReadSupport.setAvroReadSchema(conf, schema); - if (requestedSchema.isPresent()) { + if (!requestedSchema.isPresent()) { + AvroReadSupport.setAvroReadSchema(conf, schema); + } else { + // Make record schema the same as requestedSchema(reader schema) + AvroReadSupport.setAvroReadSchema(conf, requestedSchema.get()); Review Comment: To fix TestMergeIntoLogOnlyTable. Compaction will trigger at the end of "merge into". It seems that schema is not Compatible when use writeSchema+readerSchema to read parquet block to avro ``` java.lang.ArrayIndexOutOfBoundsException: 2 at org.apache.avro.io.parsing.Symbol$Alternative.getSymbol(Symbol.java:424) ~[avro-1.8.2.jar:1.8.2] at org.apache.avro.io.ResolvingDecoder.doAction(ResolvingDecoder.java:290) ~[avro-1.8.2.jar:1.8.2] at org.apache.avro.io.parsing.Parser.advance(Parser.java:88) ~[avro-1.8.2.jar:1.8.2] at org.apache.avro.io.ResolvingDecoder.readIndex(ResolvingDecoder.java:267) ~[avro-1.8.2.jar:1.8.2] at org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:179) ~[avro-1.8.2.jar:1.8.2] at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153) ~[avro-1.8.2.jar:1.8.2] at org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:232) ~[avro-1.8.2.jar:1.8.2] at org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:222) ~[avro-1.8.2.jar:1.8.2] at org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:175) ~[avro-1.8.2.jar:1.8.2] at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153) ~[avro-1.8.2.jar:1.8.2] at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:145) ~[avro-1.8.2.jar:1.8.2] at org.apache.hudi.avro.HoodieAvroUtils.bytesToAvro(HoodieAvroUtils.java:170) ~[classes/:?] at org.apache.hudi.avro.HoodieAvroUtils.bytesToAvro(HoodieAvroUtils.java:160) ~[classes/:?] at org.apache.hudi.common.model.OverwriteWithLatestAvroPayload.getInsertValue(OverwriteWithLatestAvroPayload.java:75) ~[classes/:?] at org.apache.hudi.common.model.HoodieRecordPayload.getInsertValue(HoodieRecordPayload.java:118) ~[classes/:?] at org.apache.hudi.common.model.HoodieAvroRecord.isDelete(HoodieAvroRecord.java:148) ~[classes/:?] at org.apache.hudi.io.HoodieCreateHandle.doWrite(HoodieCreateHandle.java:135) ~[classes/:?] at org.apache.hudi.io.HoodieWriteHandle.write(HoodieWriteHandle.java:205) ~[classes/:?] at org.apache.hudi.io.HoodieCreateHandle.write(HoodieCreateHandle.java:189) ~[classes/:?] at org.apache.hudi.table.HoodieSparkCopyOnWriteTable.handleInsert(HoodieSparkCopyOnWriteTable.java:262) ~[classes/:?] at org.apache.hudi.table.action.compact.HoodieCompactor.compact(HoodieCompactor.java:231) ~[classes/:?] at org.apache.hudi.table.action.compact.HoodieCompactor.lambda$compact$57154431$1(HoodieCompactor.java:137) ~[classes/:?] at org.apache.spark.api.java.JavaPairRDD$$anonfun$toScalaFunction$1.apply(JavaPairRDD.scala:1040) ~[spark-core_2.11-2.4.4.jar:2.4.4] at scala.collection.Iterator$$anon$11.next(Iterator.scala:410) ~[scala-library-2.11.12.jar:?] at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435) ~[scala-library-2.11.12.jar:?] at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441) ~[scala-library-2.11.12.jar:?] at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.rdd.RDD.iterator(RDD.scala:286) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.scheduler.Task.run(Task.scala:123) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) ~[spark-core_2.11-2.4.4.jar:2.4.4] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) ~[spark-core_2.11-2.4.4.jar:2.4.4] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_211] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_211] at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_211] ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org