[
https://issues.apache.org/jira/browse/ZOOKEEPER-4813?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Kezhu Wang updated ZOOKEEPER-4813:
----------------------------------
Fix Version/s: (was: 3.9.3)
> Make zookeeper start successfully when the last log file is dirty during the
> restore progress
> ---------------------------------------------------------------------------------------------
>
> Key: ZOOKEEPER-4813
> URL: https://issues.apache.org/jira/browse/ZOOKEEPER-4813
> Project: ZooKeeper
> Issue Type: Improvement
> Components: server
> Affects Versions: 3.9.1
> Reporter: Yan Zhao
> Assignee: Yan Zhao
> Priority: Major
> Labels: pull-request-available
> Time Spent: 1h
> Remaining Estimate: 0h
>
> When the zookeeper restarts, it will restore the data from the last valid
> snapshot file, and replay txn log to append data.
> But if the last log file is empty due to some reason, the restore will fail,
> not make the zookeeper can not restart.
> The logs as followings:
> {noformat}
> 14:12:16.023 [main] INFO org.apache.zookeeper.server.persistence.SnapStream
> - Invalid snapshot snapshot.188700025d87. len = 761554294, byte = 45
> 14:12:16.024 [main] INFO org.apache.zookeeper.server.persistence.FileSnap -
> Reading snapshot /pulsar/data/zookeeper/version-2/snapshot.188700025a05
> 14:12:17.350 [main] INFO org.apache.zookeeper.server.DataTree - The digest
> in the snapshot has digest version of 2, with zxid as 0x188700025b07, and
> digest value as 510776662607117
> 14:12:17.492 [main] ERROR org.apache.zookeeper.server.quorum.QuorumPeer -
> Unable to load database on disk
> java.io.EOFException: null
> at java.io.DataInputStream.readInt(DataInputStream.java:386) ~[?:?]
> at
> org.apache.jute.BinaryInputArchive.readInt(BinaryInputArchive.java:96)
> ~[org.apache.zookeeper-zookeeper-jute-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileHeader.deserialize(FileHeader.java:67)
> ~[org.apache.zookeeper-zookeeper-jute-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.inStreamCreated(FileTxnLog.java:725)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.createInputArchive(FileTxnLog.java:743)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.goToNextLog(FileTxnLog.java:711)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.next(FileTxnLog.java:792)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnSnapLog.fastForwardFromEdits(FileTxnSnapLog.java:361)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnSnapLog.lambda$restore$0(FileTxnSnapLog.java:267)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnSnapLog.restore(FileTxnSnapLog.java:312)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.ZKDatabase.loadDataBase(ZKDatabase.java:288)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.quorum.QuorumPeer.loadDataBase(QuorumPeer.java:1149)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.quorum.QuorumPeer.start(QuorumPeer.java:1135)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.quorum.QuorumPeerMain.runFromConfig(QuorumPeerMain.java:229)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.quorum.QuorumPeerMain.initializeAndRun(QuorumPeerMain.java:137)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.quorum.QuorumPeerMain.main(QuorumPeerMain.java:91)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> 14:12:17.502 [main] INFO
> org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider - Shutdown
> executor service with timeout 1000
> 14:12:17.508 [main] INFO org.eclipse.jetty.server.AbstractConnector -
> Stopped ServerConnector@2484f433{HTTP/1.1, (http/1.1)}{0.0.0.0:8000}
> 14:12:17.510 [main] INFO org.eclipse.jetty.server.handler.ContextHandler -
> Stopped o.e.j.s.ServletContextHandler@59a67c3a{/,null,STOPPED}
> 14:12:17.515 [main] ERROR org.apache.zookeeper.server.quorum.QuorumPeerMain -
> Unexpected exception, exiting abnormally
> java.lang.RuntimeException: Unable to run quorum server
> at
> org.apache.zookeeper.server.quorum.QuorumPeer.loadDataBase(QuorumPeer.java:1204)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.quorum.QuorumPeer.start(QuorumPeer.java:1135)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.quorum.QuorumPeerMain.runFromConfig(QuorumPeerMain.java:229)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.quorum.QuorumPeerMain.initializeAndRun(QuorumPeerMain.java:137)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.quorum.QuorumPeerMain.main(QuorumPeerMain.java:91)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> Caused by: java.io.EOFException
> at java.io.DataInputStream.readInt(DataInputStream.java:386) ~[?:?]
> at
> org.apache.jute.BinaryInputArchive.readInt(BinaryInputArchive.java:96)
> ~[org.apache.zookeeper-zookeeper-jute-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileHeader.deserialize(FileHeader.java:67)
> ~[org.apache.zookeeper-zookeeper-jute-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.inStreamCreated(FileTxnLog.java:725)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.createInputArchive(FileTxnLog.java:743)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.goToNextLog(FileTxnLog.java:711)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.next(FileTxnLog.java:792)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnSnapLog.fastForwardFromEdits(FileTxnSnapLog.java:361)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnSnapLog.lambda$restore$0(FileTxnSnapLog.java:267)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.persistence.FileTxnSnapLog.restore(FileTxnSnapLog.java:312)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.ZKDatabase.loadDataBase(ZKDatabase.java:288)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> at
> org.apache.zookeeper.server.quorum.QuorumPeer.loadDataBase(QuorumPeer.java:1149)
> ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1]
> ... 4 more
> {noformat}
>
> In fact, if the last log file open failed, we can ignore the log file.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)