[ https://issues.apache.org/jira/browse/HUDI-2675?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sagar Sumit updated HUDI-2675: ------------------------------ Priority: Blocker (was: Major) > Not an Avro data file > --------------------- > > Key: HUDI-2675 > URL: https://issues.apache.org/jira/browse/HUDI-2675 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core > Reporter: 董可伦 > Assignee: 董可伦 > Priority: Blocker > Labels: pull-request-available > Fix For: 0.10.0 > > > There are three places where I have encountered this exception,I'm not sure > if there are other places where this exception might be thrown.I'm sure that > the exception of archive commit is because the .rollback file size is 0. > I'm not sure whether the reason for the clean exception is that the file size > is 0, because I didn't save the failed clean file at that time. However, this > exception will also be thrown when the size of `. clean` and `. > clean.requested` files is 0. I think we can filter out files of size 0 when > we get clean and archive instances? in addition, it is unclear what caused > the . rollback file size to be 0. Is it due to high concurrency or abnormal > exit of the program due to network reasons? > 1. > {code:java} > o.a.hudi.table.HoodieTimelineArchiveLog Failed to archive commits, .commit > file: 20210927181216.rollback > java.io.IOException: Not an Avro data file > at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50) > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) > at > org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103) > at > org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341) > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305) > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) > at > org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:439) > at > org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187) > at > org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129) > at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:427) > at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:331) > at > org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) > at > org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166) > at > org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208) > at > org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748){code} > 2. > {code:java} > org.apache.hudi.exception.HoodieIOException: Failed to schedule clean > operation > at > org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:95) > at > org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:107) > at > org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.execute(BaseCleanPlanActionExecutor.java:129) > at > org.apache.hudi.table.HoodieJavaCopyOnWriteTable.scheduleCleaning(HoodieJavaCopyOnWriteTable.java:182) > at > org.apache.hudi.client.AbstractHoodieWriteClient.scheduleTableServiceInternal(AbstractHoodieWriteClient.java:961) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:653) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:641) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:672) > at > org.apache.hudi.client.AbstractHoodieWriteClient.autoCleanOnCommit(AbstractHoodieWriteClient.java:505) > at > org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:440) > at > org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187) > at > org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129) > at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:405) > at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:335) > at > org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) > at > org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166) > at > org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208) > at > org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.io.IOException: Not an Avro data file > at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50) > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeHoodieCleanMetadata(TimelineMetadataUtils.java:152) > at > org.apache.hudi.table.action.clean.CleanPlanner.getPartitionPathsForCleanByCommits(CleanPlanner.java:150) > at > org.apache.hudi.table.action.clean.CleanPlanner.getPartitionPathsToClean(CleanPlanner.java:126) > at > org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:73) > ... 24 more{code} > 3. > {code:java} > o.a.h.t.a.clean.BaseCleanActionExecutor Failed to perform previous clean > operation, instant: [==>20211011143809__clean__REQUESTED] > org.apache.hudi.exception.HoodieIOException: Not an Avro data file > at > org.apache.hudi.table.action.clean.BaseCleanActionExecutor.runPendingClean(BaseCleanActionExecutor.java:87) > at > org.apache.hudi.table.action.clean.BaseCleanActionExecutor.lambda$execute$0(BaseCleanActionExecutor.java:137) > at java.util.ArrayList.forEach(ArrayList.java:1257) > at > org.apache.hudi.table.action.clean.BaseCleanActionExecutor.execute(BaseCleanActionExecutor.java:134) > at > org.apache.hudi.table.HoodieJavaCopyOnWriteTable.clean(HoodieJavaCopyOnWriteTable.java:188) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:660) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:641) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:672) > at > org.apache.hudi.client.AbstractHoodieWriteClient.autoCleanOnCommit(AbstractHoodieWriteClient.java:505) > at > org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:440) > at > org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187) > at > org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129) > at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:401) > at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:305) > at > org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) > at > org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166) > at > org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208) > at > org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.io.IOException: Not an Avro data file > at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50) > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) > at > org.apache.hudi.common.util.CleanerUtils.getCleanerPlan(CleanerUtils.java:106) > at > org.apache.hudi.table.action.clean.BaseCleanActionExecutor.runPendingClean(BaseCleanActionExecutor.java:84) > ... 24 common frames omitted{code} > > > > -- This message was sent by Atlassian Jira (v8.20.1#820001)