[ 
https://issues.apache.org/jira/browse/HUDI-2675?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

董可伦 updated HUDI-2675:
----------------------
    Description: 
There are three places where I have encountered this exception,I'm not sure if 
there are other places where this exception might be thrown.I'm sure that the 
exception of archive commit is because the .rollback file size is 0.

I'm not sure whether the reason for the clean exception is that the file size 
is 0, because I didn't save the failed clean file at that time. However, this 
exception will also be thrown when the size of `. clean` and `. 
clean.requested` files is 0. I think we can filter out files of size 0 when we 
get clean and archive instances?  in addition, it is unclear what caused the . 
rollback file size to be 0. Is it due to high concurrency or abnormal exit of 
the program due to network reasons?

```scala

o.a.hudi.table.HoodieTimelineArchiveLog Failed to archive commits, .commit 
file: 20210927181216.rollback
java.io.IOException: Not an Avro data file
 at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50)
 at 
org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178)
 at 
org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103)
 at 
org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341)
 at 
org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305)
 at 
org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:439)
 at 
org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187)
 at 
org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129)
 at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:427)
 at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:331)
 at 
org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
 at 
org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166)
 at 
org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208)
 at 
org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117)
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
 at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
 at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
 at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)

```

```

org.apache.hudi.exception.HoodieIOException: Failed to schedule clean operation
 at 
org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:95)
 at 
org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:107)
 at 
org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.execute(BaseCleanPlanActionExecutor.java:129)
 at 
org.apache.hudi.table.HoodieJavaCopyOnWriteTable.scheduleCleaning(HoodieJavaCopyOnWriteTable.java:182)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.scheduleTableServiceInternal(AbstractHoodieWriteClient.java:961)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:653)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:641)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:672)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.autoCleanOnCommit(AbstractHoodieWriteClient.java:505)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:440)
 at 
org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187)
 at 
org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129)
 at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:405)
 at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:335)
 at 
org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
 at 
org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166)
 at 
org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208)
 at 
org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117)
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
 at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
 at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
 at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: Not an Avro data file
 at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50)
 at 
org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178)
 at 
org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeHoodieCleanMetadata(TimelineMetadataUtils.java:152)
 at 
org.apache.hudi.table.action.clean.CleanPlanner.getPartitionPathsForCleanByCommits(CleanPlanner.java:150)
 at 
org.apache.hudi.table.action.clean.CleanPlanner.getPartitionPathsToClean(CleanPlanner.java:126)
 at 
org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:73)
 ... 24 more

```

 

```scala

o.a.h.t.a.clean.BaseCleanActionExecutor Failed to perform previous clean 
operation, instant: [==>20211011143809__clean__REQUESTED]
org.apache.hudi.exception.HoodieIOException: Not an Avro data file
 at 
org.apache.hudi.table.action.clean.BaseCleanActionExecutor.runPendingClean(BaseCleanActionExecutor.java:87)
 at 
org.apache.hudi.table.action.clean.BaseCleanActionExecutor.lambda$execute$0(BaseCleanActionExecutor.java:137)
 at java.util.ArrayList.forEach(ArrayList.java:1257)
 at 
org.apache.hudi.table.action.clean.BaseCleanActionExecutor.execute(BaseCleanActionExecutor.java:134)
 at 
org.apache.hudi.table.HoodieJavaCopyOnWriteTable.clean(HoodieJavaCopyOnWriteTable.java:188)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:660)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:641)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:672)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.autoCleanOnCommit(AbstractHoodieWriteClient.java:505)
 at 
org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:440)
 at 
org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187)
 at 
org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129)
 at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:401)
 at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:305)
 at 
org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
 at 
org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166)
 at 
org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208)
 at 
org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117)
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
 at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
 at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
 at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: Not an Avro data file
 at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50)
 at 
org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178)
 at 
org.apache.hudi.common.util.CleanerUtils.getCleanerPlan(CleanerUtils.java:106)
 at 
org.apache.hudi.table.action.clean.BaseCleanActionExecutor.runPendingClean(BaseCleanActionExecutor.java:84)
 ... 24 common frames omitted

```

 

 

 

> Not an Avro data file
> ---------------------
>
>                 Key: HUDI-2675
>                 URL: https://issues.apache.org/jira/browse/HUDI-2675
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: Common Core
>            Reporter: 董可伦
>            Assignee: 董可伦
>            Priority: Major
>             Fix For: 0.10.0
>
>
> There are three places where I have encountered this exception,I'm not sure 
> if there are other places where this exception might be thrown.I'm sure that 
> the exception of archive commit is because the .rollback file size is 0.
> I'm not sure whether the reason for the clean exception is that the file size 
> is 0, because I didn't save the failed clean file at that time. However, this 
> exception will also be thrown when the size of `. clean` and `. 
> clean.requested` files is 0. I think we can filter out files of size 0 when 
> we get clean and archive instances?  in addition, it is unclear what caused 
> the . rollback file size to be 0. Is it due to high concurrency or abnormal 
> exit of the program due to network reasons?
> ```scala
> o.a.hudi.table.HoodieTimelineArchiveLog Failed to archive commits, .commit 
> file: 20210927181216.rollback
> java.io.IOException: Not an Avro data file
>  at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50)
>  at 
> org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178)
>  at 
> org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103)
>  at 
> org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341)
>  at 
> org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305)
>  at 
> org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:439)
>  at 
> org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187)
>  at 
> org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:427)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:331)
>  at 
> org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
>  at 
> org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166)
>  at 
> org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208)
>  at 
> org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117)
>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>  at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748)
> ```
> ```
> org.apache.hudi.exception.HoodieIOException: Failed to schedule clean 
> operation
>  at 
> org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:95)
>  at 
> org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:107)
>  at 
> org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.execute(BaseCleanPlanActionExecutor.java:129)
>  at 
> org.apache.hudi.table.HoodieJavaCopyOnWriteTable.scheduleCleaning(HoodieJavaCopyOnWriteTable.java:182)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.scheduleTableServiceInternal(AbstractHoodieWriteClient.java:961)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:653)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:641)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:672)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.autoCleanOnCommit(AbstractHoodieWriteClient.java:505)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:440)
>  at 
> org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187)
>  at 
> org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:405)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:335)
>  at 
> org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
>  at 
> org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166)
>  at 
> org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208)
>  at 
> org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117)
>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>  at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748)
> Caused by: java.io.IOException: Not an Avro data file
>  at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50)
>  at 
> org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178)
>  at 
> org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeHoodieCleanMetadata(TimelineMetadataUtils.java:152)
>  at 
> org.apache.hudi.table.action.clean.CleanPlanner.getPartitionPathsForCleanByCommits(CleanPlanner.java:150)
>  at 
> org.apache.hudi.table.action.clean.CleanPlanner.getPartitionPathsToClean(CleanPlanner.java:126)
>  at 
> org.apache.hudi.table.action.clean.BaseCleanPlanActionExecutor.requestClean(BaseCleanPlanActionExecutor.java:73)
>  ... 24 more
> ```
>  
> ```scala
> o.a.h.t.a.clean.BaseCleanActionExecutor Failed to perform previous clean 
> operation, instant: [==>20211011143809__clean__REQUESTED]
> org.apache.hudi.exception.HoodieIOException: Not an Avro data file
>  at 
> org.apache.hudi.table.action.clean.BaseCleanActionExecutor.runPendingClean(BaseCleanActionExecutor.java:87)
>  at 
> org.apache.hudi.table.action.clean.BaseCleanActionExecutor.lambda$execute$0(BaseCleanActionExecutor.java:137)
>  at java.util.ArrayList.forEach(ArrayList.java:1257)
>  at 
> org.apache.hudi.table.action.clean.BaseCleanActionExecutor.execute(BaseCleanActionExecutor.java:134)
>  at 
> org.apache.hudi.table.HoodieJavaCopyOnWriteTable.clean(HoodieJavaCopyOnWriteTable.java:188)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:660)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:641)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.clean(AbstractHoodieWriteClient.java:672)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.autoCleanOnCommit(AbstractHoodieWriteClient.java:505)
>  at 
> org.apache.hudi.client.AbstractHoodieWriteClient.postCommit(AbstractHoodieWriteClient.java:440)
>  at 
> org.apache.hudi.client.HoodieJavaWriteClient.postWrite(HoodieJavaWriteClient.java:187)
>  at 
> org.apache.hudi.client.HoodieJavaWriteClient.insert(HoodieJavaWriteClient.java:129)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.write(JavaHudi.java:401)
>  at org.apache.nifi.processors.javaHudi.JavaHudi.onTrigger(JavaHudi.java:305)
>  at 
> org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
>  at 
> org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1166)
>  at 
> org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:208)
>  at 
> org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117)
>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>  at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
>  at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at java.lang.Thread.run(Thread.java:748)
> Caused by: java.io.IOException: Not an Avro data file
>  at org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:50)
>  at 
> org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178)
>  at 
> org.apache.hudi.common.util.CleanerUtils.getCleanerPlan(CleanerUtils.java:106)
>  at 
> org.apache.hudi.table.action.clean.BaseCleanActionExecutor.runPendingClean(BaseCleanActionExecutor.java:84)
>  ... 24 common frames omitted
> ```
>  
>  
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to