[ 
https://issues.apache.org/jira/browse/KAFKA-10352?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Seongbae Chang resolved KAFKA-10352.
------------------------------------
    Resolution: Resolved

> Error while reading checkpoint file /tmp/kafka-logs/cleaner-offset-checkpoint 
> (kafka.server.LogDirFailureChannel)
> -----------------------------------------------------------------------------------------------------------------
>
>                 Key: KAFKA-10352
>                 URL: https://issues.apache.org/jira/browse/KAFKA-10352
>             Project: Kafka
>          Issue Type: Bug
>          Components: log cleaner
>            Reporter: Seongbae Chang
>            Priority: Critical
>
> One of my Kafka brokers(total 3, and version 2.5.0) was shut down suddenly. 
> And then, other brokers also was shut down because of similar causes.
>  
> Main cause of this problem is '*Error while reading checkpoint file 
> /tmp/kafka-logs/cleaner-offset-checkpoint (kafka.server.LogDirFailureChannel)*
> *java.nio.file.NoSuchFileException: 
> /tmp/kafka-logs/cleaner-offset-checkpoint*'
>  
> I haven't known why this error occurs and how to solve it. Please, give me 
> some answers or comments about it. Thank you.
> And I attached the content of log files such as kafkaServer.out, 
> log-cleaner.log
>  
> kafkaServer.out
> {code:java}
> [2020-07-30 19:49:05,992] INFO [GroupMetadataManager brokerId=3] Removed 0 
> expired offsets in 0 milliseconds. 
> (kafka.coordinator.group.GroupMetadataManager)[2020-07-30 19:49:05,992] INFO 
> [GroupMetadataManager brokerId=3] Removed 0 expired offsets in 0 
> milliseconds. (kafka.coordinator.group.GroupMetadataManager)[2020-07-30 
> 19:56:48,080] ERROR Error while reading checkpoint file 
> /tmp/kafka-logs/cleaner-offset-checkpoint 
> (kafka.server.LogDirFailureChannel)java.nio.file.NoSuchFileException: 
> /tmp/kafka-logs/cleaner-offset-checkpoint at 
> sun.nio.fs.UnixException.translateToIOException(UnixException.java:86) at 
> sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102) at 
> sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107) at 
> sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:214)
>  at java.nio.file.Files.newByteChannel(Files.java:361) at 
> java.nio.file.Files.newByteChannel(Files.java:407) at 
> java.nio.file.spi.FileSystemProvider.newInputStream(FileSystemProvider.java:384)
>  at java.nio.file.Files.newInputStream(Files.java:152) at 
> java.nio.file.Files.newBufferedReader(Files.java:2784) at 
> java.nio.file.Files.newBufferedReader(Files.java:2816) at 
> kafka.server.checkpoints.CheckpointFile.liftedTree2$1(CheckpointFile.scala:87)
>  at kafka.server.checkpoints.CheckpointFile.read(CheckpointFile.scala:86) at 
> kafka.server.checkpoints.OffsetCheckpointFile.read(OffsetCheckpointFile.scala:61)
>  at 
> kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$2(LogCleanerManager.scala:134)
>  at scala.collection.Iterator$$anon$10.nextCur(Iterator.scala:583) at 
> scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:597) at 
> scala.collection.mutable.ListBuffer.addAll(ListBuffer.scala:118) at 
> scala.collection.mutable.ListBuffer$.from(ListBuffer.scala:38) at 
> scala.collection.immutable.List$.from(List.scala:617) at 
> scala.collection.immutable.List$.from(List.scala:611) at 
> scala.collection.IterableFactory$Delegate.from(Factory.scala:288) at 
> scala.collection.immutable.Iterable$.from(Iterable.scala:35) at 
> scala.collection.immutable.Iterable$.from(Iterable.scala:32) at 
> scala.collection.IterableFactory$Delegate.from(Factory.scala:288) at 
> scala.collection.IterableOps.flatMap(Iterable.scala:674) at 
> scala.collection.IterableOps.flatMap$(Iterable.scala:674) at 
> scala.collection.AbstractIterable.flatMap(Iterable.scala:921) at 
> kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$1(LogCleanerManager.scala:132)
>  at 
> kafka.log.LogCleanerManager.allCleanerCheckpoints(LogCleanerManager.scala:140)
>  at 
> kafka.log.LogCleanerManager.$anonfun$grabFilthiestCompactedLog$1(LogCleanerManager.scala:171)
>  at 
> kafka.log.LogCleanerManager.grabFilthiestCompactedLog(LogCleanerManager.scala:168)
>  at 
> kafka.log.LogCleaner$CleanerThread.cleanFilthiestLog(LogCleaner.scala:327) at 
> kafka.log.LogCleaner$CleanerThread.tryCleanFilthiestLog(LogCleaner.scala:314) 
> at kafka.log.LogCleaner$CleanerThread.doWork(LogCleaner.scala:303) at 
> kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:96)[2020-07-30 
> 19:56:48,083] WARN [ReplicaManager broker=3] Stopping serving replicas in dir 
> /tmp/kafka-logs (kafka.server.ReplicaManager)[2020-07-30 19:56:48,086] INFO 
> [ReplicaFetcherManager on broker 3] Removed fetcher for partitions 
> HashSet(__consumer_offsets-8, sbchang.test.partition-0, 
> __consumer_offsets-47, sbchang.test.partition-2, sbchang.test.header-2, 
> configtest-0, __ispossible-0, __consumer_offsets-32, __consumer_offsets-35, 
> temp-iot-0, __consumer_offsets-41, __consumer_offsets-23, 
> test-security-sasl-plain-001-0, __consumer_offsets-38, __consumer_offsets-17, 
> test-security-ssl-001-0, sbchang.test.header-1, __consumer_offsets-11, 
> __consumer_offsets-2, __consumer_offsets-14, resource-v1-CloudIoTCore-Rule-0, 
> __consumer_offsets-20, __consumer_offsets-44, app001-transform-my001-0, 
> sbchang.test.header-0, __consumer_offsets-5, __consumer_offsets-26, 
> __consumer_offsets-29, sbchang.test.partition-1) 
> (kafka.server.ReplicaFetcherManager)[2020-07-30 19:56:48,086] INFO 
> [ReplicaAlterLogDirsManager on broker 3] Removed fetcher for partitions 
> HashSet(__consumer_offsets-8, sbchang.test.partition-0, 
> __consumer_offsets-47, sbchang.test.partition-2, sbchang.test.header-2, 
> configtest-0, __ispossible-0, __consumer_offsets-32, __consumer_offsets-35, 
> temp-iot-0, __consumer_offsets-41, __consumer_offsets-23, 
> test-security-sasl-plain-001-0, __consumer_offsets-38, __consumer_offsets-17, 
> test-security-ssl-001-0, sbchang.test.header-1, __consumer_offsets-11, 
> __consumer_offsets-2, __consumer_offsets-14, resource-v1-CloudIoTCore-Rule-0, 
> __consumer_offsets-20, __consumer_offsets-44, app001-transform-my001-0, 
> sbchang.test.header-0, __consumer_offsets-5, __consumer_offsets-26, 
> __consumer_offsets-29, sbchang.test.partition-1) 
> (kafka.server.ReplicaAlterLogDirsManager)[2020-07-30 19:56:48,096] WARN 
> [ReplicaManager broker=3] Broker 3 stopped fetcher for partitions 
> __consumer_offsets-8,sbchang.test.partition-0,__consumer_offsets-47,sbchang.test.partition-2,sbchang.test.header-2,configtest-0,__ispossible-0,__consumer_offsets-32,__consumer_offsets-35,temp-iot-0,__consumer_offsets-41,__consumer_offsets-23,test-security-sasl-plain-001-0,__consumer_offsets-38,__consumer_offsets-17,test-security-ssl-001-0,sbchang.test.header-1,__consumer_offsets-11,__consumer_offsets-2,__consumer_offsets-14,resource-v1-CloudIoTCore-Rule-0,__consumer_offsets-20,__consumer_offsets-44,app001-transform-my001-0,sbchang.test.header-0,__consumer_offsets-5,__consumer_offsets-26,__consumer_offsets-29,sbchang.test.partition-1
>  and stopped moving logs for partitions  because they are in the failed log 
> directory /tmp/kafka-logs. (kafka.server.ReplicaManager)[2020-07-30 
> 19:56:48,096] WARN Stopping serving logs in dir /tmp/kafka-logs 
> (kafka.log.LogManager)[2020-07-30 19:56:48,098] ERROR Shutdown broker because 
> all log dirs in /tmp/kafka-logs have failed (kafka.log.LogManager)
> {code}
>  
> *log-cleaner.log*
> {code:java}
> [2020-07-30 19:56:48,083] ERROR Failed to access checkpoint file 
> cleaner-offset-checkpoint in dir /tmp/kafka-logs 
> (kafka.log.LogCleaner)[2020-07-30 19:56:48,083] ERROR Failed to access 
> checkpoint file cleaner-offset-checkpoint in dir /tmp/kafka-logs 
> (kafka.log.LogCleaner)org.apache.kafka.common.errors.KafkaStorageException: 
> Error while reading checkpoint file 
> /tmp/kafka-logs/cleaner-offset-checkpointCaused by: 
> java.nio.file.NoSuchFileException: /tmp/kafka-logs/cleaner-offset-checkpoint 
> at sun.nio.fs.UnixException.translateToIOException(UnixException.java:86) at 
> sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102) at 
> sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107) at 
> sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:214)
>  at java.nio.file.Files.newByteChannel(Files.java:361) at 
> java.nio.file.Files.newByteChannel(Files.java:407) at 
> java.nio.file.spi.FileSystemProvider.newInputStream(FileSystemProvider.java:384)
>  at java.nio.file.Files.newInputStream(Files.java:152) at 
> java.nio.file.Files.newBufferedReader(Files.java:2784) at 
> java.nio.file.Files.newBufferedReader(Files.java:2816) at 
> kafka.server.checkpoints.CheckpointFile.liftedTree2$1(CheckpointFile.scala:87)
>  at kafka.server.checkpoints.CheckpointFile.read(CheckpointFile.scala:86) at 
> kafka.server.checkpoints.OffsetCheckpointFile.read(OffsetCheckpointFile.scala:61)
>  at 
> kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$2(LogCleanerManager.scala:134)
>  at scala.collection.Iterator$$anon$10.nextCur(Iterator.scala:583) at 
> scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:597) at 
> scala.collection.mutable.ListBuffer.addAll(ListBuffer.scala:118) at 
> scala.collection.mutable.ListBuffer$.from(ListBuffer.scala:38) at 
> scala.collection.immutable.List$.from(List.scala:617) at 
> scala.collection.immutable.List$.from(List.scala:611) at 
> scala.collection.IterableFactory$Delegate.from(Factory.scala:288) at 
> scala.collection.immutable.Iterable$.from(Iterable.scala:35) at 
> scala.collection.immutable.Iterable$.from(Iterable.scala:32) at 
> scala.collection.IterableFactory$Delegate.from(Factory.scala:288) at 
> scala.collection.IterableOps.flatMap(Iterable.scala:674) at 
> scala.collection.IterableOps.flatMap$(Iterable.scala:674) at 
> scala.collection.AbstractIterable.flatMap(Iterable.scala:921) at 
> kafka.log.LogCleanerManager.$anonfun$allCleanerCheckpoints$1(LogCleanerManager.scala:132)
>  at 
> kafka.log.LogCleanerManager.allCleanerCheckpoints(LogCleanerManager.scala:140)
>  at 
> kafka.log.LogCleanerManager.$anonfun$grabFilthiestCompactedLog$1(LogCleanerManager.scala:171)
>  at 
> kafka.log.LogCleanerManager.grabFilthiestCompactedLog(LogCleanerManager.scala:168)
>  at 
> kafka.log.LogCleaner$CleanerThread.cleanFilthiestLog(LogCleaner.scala:327) at 
> kafka.log.LogCleaner$CleanerThread.tryCleanFilthiestLog(LogCleaner.scala:314) 
> at kafka.log.LogCleaner$CleanerThread.doWork(LogCleaner.scala:303) at 
> kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:96)
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to