[ https://issues.apache.org/jira/browse/HUDI-2675?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
董可伦 updated HUDI-2675: ---------------------- Description: There are three places where I have encountered this exception,I'm not sure if there are other places where this exception might be thrown.I'm sure that the exception of archive commit is because the .rollback file size is 0. I'm not sure whether the reason for the clean exception is that the file size is 0, because I didn't save the failed clean file at that time. However, this exception will also be thrown when the size of `. clean` and `. clean.requested` files is 0. I think we can filter out files of size 0 when we get clean and archive instances? in addition, it is unclear what caused the . rollback file size to be 0. Is it due to high concurrency or abnormal exit of the program due to network reasons? ```scala o.a.hudi.table.HoodieTimelineArchiveLog Failed to archive commits, .commit file: 20210927181216.rollback java.io.IOException: Not an Avro data file at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50) at org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) at org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103) at org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341) at org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305) at org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) at org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:439) at org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187) at org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129) at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:427) at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:331) at org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) at org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166) at org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208) at org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) ``` ``` org.apache.hudi.exception.HoodieIOException: Failed to schedule clean operation at org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:95) at org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:107) at org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.execute(BaseCleanPlanActionExecutor.java:129) at org.apache.hudi.table.HoodieJavaCopyOnWriteTable.scheduleCleaning(HoodieJavaCopyOnWriteTable.java:182) at org.apache.hudi.client.AbstractHoodieWriteClient.scheduleTableServiceInternal(AbstractHoodieWriteClient.java:961) at org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:653) at org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:641) at org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:672) at org.apache.hudi.client.AbstractHoodieWriteClient.autoCleanOnCommit(AbstractHoodieWriteClient.java:505) at org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:440) at org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187) at org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129) at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:405) at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:335) at org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) at org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166) at org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208) at org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.io.IOException: Not an Avro data file at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50) at org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) at org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeHoodieCleanMetadata(TimelineMetadataUtils.java:152) at org.apache.hudi.table.action.clean.CleanPlanner.getPartitionPathsForCleanByCommits(CleanPlanner.java:150) at org.apache.hudi.table.action.clean.CleanPlanner.getPartitionPathsToClean(CleanPlanner.java:126) at org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:73) ... 24 more ``` ```scala o.a.h.t.a.clean.BaseCleanActionExecutor Failed to perform previous clean operation, instant: [==>20211011143809__clean__REQUESTED] org.apache.hudi.exception.HoodieIOException: Not an Avro data file at org.apache.hudi.table.action.clean.BaseCleanActionExecutor.runPendingClean(BaseCleanActionExecutor.java:87) at org.apache.hudi.table.action.clean.BaseCleanActionExecutor.lambda$execute$0(BaseCleanActionExecutor.java:137) at java.util.ArrayList.forEach(ArrayList.java:1257) at org.apache.hudi.table.action.clean.BaseCleanActionExecutor.execute(BaseCleanActionExecutor.java:134) at org.apache.hudi.table.HoodieJavaCopyOnWriteTable.clean(HoodieJavaCopyOnWriteTable.java:188) at org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:660) at org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:641) at org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:672) at org.apache.hudi.client.AbstractHoodieWriteClient.autoCleanOnCommit(AbstractHoodieWriteClient.java:505) at org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:440) at org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187) at org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129) at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:401) at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:305) at org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) at org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166) at org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208) at org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.io.IOException: Not an Avro data file at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50) at org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) at org.apache.hudi.common.util.CleanerUtils.getCleanerPlan(CleanerUtils.java:106) at org.apache.hudi.table.action.clean.BaseCleanActionExecutor.runPendingClean(BaseCleanActionExecutor.java:84) ... 24 common frames omitted ``` > Not an Avro data file > --------------------- > > Key: HUDI-2675 > URL: https://issues.apache.org/jira/browse/HUDI-2675 > Project: Apache Hudi > Issue Type: Bug > Components: Common Core > Reporter: 董可伦 > Assignee: 董可伦 > Priority: Major > Fix For: 0.10.0 > > > There are three places where I have encountered this exception,I'm not sure > if there are other places where this exception might be thrown.I'm sure that > the exception of archive commit is because the .rollback file size is 0. > I'm not sure whether the reason for the clean exception is that the file size > is 0, because I didn't save the failed clean file at that time. However, this > exception will also be thrown when the size of `. clean` and `. > clean.requested` files is 0. I think we can filter out files of size 0 when > we get clean and archive instances? in addition, it is unclear what caused > the . rollback file size to be 0. Is it due to high concurrency or abnormal > exit of the program due to network reasons? > ```scala > o.a.hudi.table.HoodieTimelineArchiveLog Failed to archive commits, .commit > file: 20210927181216.rollback > java.io.IOException: Not an Avro data file > at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50) > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) > at > org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103) > at > org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341) > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305) > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) > at > org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:439) > at > org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187) > at > org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129) > at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:427) > at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:331) > at > org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) > at > org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166) > at > org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208) > at > org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > ``` > ``` > org.apache.hudi.exception.HoodieIOException: Failed to schedule clean > operation > at > org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:95) > at > org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:107) > at > org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.execute(BaseCleanPlanActionExecutor.java:129) > at > org.apache.hudi.table.HoodieJavaCopyOnWriteTable.scheduleCleaning(HoodieJavaCopyOnWriteTable.java:182) > at > org.apache.hudi.client.AbstractHoodieWriteClient.scheduleTableServiceInternal(AbstractHoodieWriteClient.java:961) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:653) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:641) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:672) > at > org.apache.hudi.client.AbstractHoodieWriteClient.autoCleanOnCommit(AbstractHoodieWriteClient.java:505) > at > org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:440) > at > org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187) > at > org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129) > at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:405) > at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:335) > at > org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) > at > org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166) > at > org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208) > at > org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.io.IOException: Not an Avro data file > at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50) > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeHoodieCleanMetadata(TimelineMetadataUtils.java:152) > at > org.apache.hudi.table.action.clean.CleanPlanner.getPartitionPathsForCleanByCommits(CleanPlanner.java:150) > at > org.apache.hudi.table.action.clean.CleanPlanner.getPartitionPathsToClean(CleanPlanner.java:126) > at > org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:73) > ... 24 more > ``` > > ```scala > o.a.h.t.a.clean.BaseCleanActionExecutor Failed to perform previous clean > operation, instant: [==>20211011143809__clean__REQUESTED] > org.apache.hudi.exception.HoodieIOException: Not an Avro data file > at > org.apache.hudi.table.action.clean.BaseCleanActionExecutor.runPendingClean(BaseCleanActionExecutor.java:87) > at > org.apache.hudi.table.action.clean.BaseCleanActionExecutor.lambda$execute$0(BaseCleanActionExecutor.java:137) > at java.util.ArrayList.forEach(ArrayList.java:1257) > at > org.apache.hudi.table.action.clean.BaseCleanActionExecutor.execute(BaseCleanActionExecutor.java:134) > at > org.apache.hudi.table.HoodieJavaCopyOnWriteTable.clean(HoodieJavaCopyOnWriteTable.java:188) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:660) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:641) > at > org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:672) > at > org.apache.hudi.client.AbstractHoodieWriteClient.autoCleanOnCommit(AbstractHoodieWriteClient.java:505) > at > org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:440) > at > org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187) > at > org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129) > at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:401) > at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:305) > at > org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) > at > org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166) > at > org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208) > at > org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.io.IOException: Not an Avro data file > at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50) > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) > at > org.apache.hudi.common.util.CleanerUtils.getCleanerPlan(CleanerUtils.java:106) > at > org.apache.hudi.table.action.clean.BaseCleanActionExecutor.runPendingClean(BaseCleanActionExecutor.java:84) > ... 24 common frames omitted > ``` > > > -- This message was sent by Atlassian Jira (v8.3.4#803005)