[
https://issues.apache.org/jira/browse/ZOOKEEPER-1936?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15398609#comment-15398609
]
Nicholas DiPiazza edited comment on ZOOKEEPER-1936 at 7/29/16 4:43 AM:
-----------------------------------------------------------------------
v1 and v2 patch with no changes. v3 doesn't:
{code}
branch-3.4
{code}
{code}
patch -p0 < ZOOKEEPER-1936.v3.patch
patching file
src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
Hunk #1 FAILED at 101.
Hunk #2 FAILED at 117.
2 out of 2 hunks FAILED -- saving rejects to file
src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java.rej
{code}
{code}
cat
src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java.rej
--- src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
+++ src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
@@ -101,7 +101,7 @@ public FileTxnSnapLog(File dataDir, File snapDir) throws
IOException {
+ " is false). Please create this directory
manually.");
}
- if (!this.dataDir.mkdirs()) {
+ if (!this.dataDir.mkdirs() && !this.dataDir.exists()) {
throw new DatadirException("Unable to create data directory "
+ this.dataDir);
}
@@ -117,7 +117,7 @@ public FileTxnSnapLog(File dataDir, File snapDir) throws
IOException {
+ " is false). Please create this directory
manually.");
}
- if (!this.snapDir.mkdirs()) {
+ if (!this.snapDir.mkdirs() && !this.snapDir.exists()) {
throw new DatadirException("Unable to create snap directory "
+ this.snapDir);
}
{code}
My attempt at patching 3.4.9:
{code}
diff --git
a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnLog.java
b/src/java/main/org/apache/zookeeper/server/persistence/FileTxnLog.java
index b1682c3..3f26b0a 100644
--- a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnLog.java
+++ b/src/java/main/org/apache/zookeeper/server/persistence/FileTxnLog.java
@@ -355,7 +355,20 @@
* logs
*/
public TxnIterator read(long zxid) throws IOException {
- return new FileTxnIterator(logDir, zxid);
+ return read(zxid, true);
+ }
+
+ /**
+ * start reading all the transactions from the given zxid.
+ *
+ * @param zxid the zxid to start reading transactions from
+ * @param fastForward true if the iterator should be fast forwarded to
point
+ * to the txn of a given zxid, else the iterator will point to the
+ * starting txn of a txnlog that may contain txn of a given zxid
+ * @return returns an iterator to iterate through the transaction logs
+ */
+ public TxnIterator read(long zxid, boolean fastForward) throws IOException
{
+ return new FileTxnIterator(logDir, zxid, fastForward);
}
/**
@@ -375,10 +388,10 @@
}
long pos = input.getPosition();
// now, truncate at the current position
- RandomAccessFile raf = new RandomAccessFile(itr.logFile, "rw");
+ RandomAccessFile raf=new RandomAccessFile(itr.logFile,"rw");
raf.setLength(pos);
raf.close();
- while (itr.goToNextLog()) {
+ while(itr.goToNextLog()) {
if (!itr.logFile.delete()) {
LOG.warn("Unable to truncate {}", itr.logFile);
}
@@ -523,12 +536,34 @@
* create an iterator over a transaction database directory
* @param logDir the transaction database directory
* @param zxid the zxid to start reading from
+ * @param fastForward true if the iterator should be fast forwarded
to
+ * point to the txn of a given zxid, else the iterator will
+ * point to the starting txn of a txnlog that may contain txn of
+ * a given zxid
+ * @throws IOException
+ */
+ public FileTxnIterator(File logDir, long zxid, boolean fastForward)
+ throws IOException {
+ this.logDir = logDir;
+ this.zxid = zxid;
+ init();
+
+ if (fastForward && hdr != null) {
+ while (hdr.getZxid() < zxid) {
+ if (!next())
+ break;
+ }
+ }
+ }
+
+ /**
+ * create an iterator over a transaction database directory
+ * @param logDir the transaction database directory
+ * @param zxid the zxid to start reading from
* @throws IOException
*/
public FileTxnIterator(File logDir, long zxid) throws IOException {
- this.logDir = logDir;
- this.zxid = zxid;
- init();
+ this(logDir, zxid, true);
}
/**
@@ -552,10 +587,17 @@
goToNextLog();
if (!next())
return;
- while (hdr.getZxid() < zxid) {
- if (!next())
- return;
+ }
+
+ /**
+ * Return total storage size of txnlog that will return by this
iterator.
+ */
+ public long getStorageSize() {
+ long sum = 0;
+ for (File f : storedFiles) {
+ sum += f.length();
}
+ return sum;
}
/**
@@ -637,8 +679,6 @@
crc.update(bytes, 0, bytes.length);
if (crcValue != crc.getValue())
throw new IOException(CRC_ERROR);
- if (bytes == null || bytes.length == 0)
- return false;
hdr = new TxnHeader();
record = SerializeUtils.deserializeTxn(bytes, hdr);
} catch (EOFException e) {
diff --git
a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
b/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
index 6f0df51..98d94b4 100644
--- a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
+++ b/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
@@ -58,6 +58,10 @@
private static final Logger LOG =
LoggerFactory.getLogger(FileTxnSnapLog.class);
+ public static final String ZOOKEEPER_DATADIR_AUTOCREATE =
+ "zookeeper.datadir.autocreate";
+
+ public static final String ZOOKEEPER_DATADIR_AUTOCREATE_DEFAULT = "true";
/**
* This listener helps
* the external apis calling
@@ -80,15 +84,38 @@
this.dataDir = new File(dataDir, version + VERSION);
this.snapDir = new File(snapDir, version + VERSION);
+ // by default create snap/log dirs, but otherwise complain instead
+ // See ZOOKEEPER-1161 for more details
+ boolean enableAutocreate = Boolean.valueOf(
+ System.getProperty(ZOOKEEPER_DATADIR_AUTOCREATE,
+ ZOOKEEPER_DATADIR_AUTOCREATE_DEFAULT));
if (!this.dataDir.exists()) {
- if (!this.dataDir.mkdirs()) {
- throw new IOException("Unable to create data directory "
+ if (!enableAutocreate) {
+ throw new DatadirException("Missing data directory "
+ + this.dataDir
+ + ", automatic data directory creation is disabled ("
+ + ZOOKEEPER_DATADIR_AUTOCREATE
+ + " is false). Please create this directory
manually.");
+ }
+
+ if (!this.dataDir.mkdirs() && !this.dataDir.exists()) {
+ throw new DatadirException("Unable to create data directory "
+ this.dataDir);
}
}
if (!this.snapDir.exists()) {
- if (!this.snapDir.mkdirs()) {
- throw new IOException("Unable to create snap directory "
+ // by default create this directory, but otherwise complain instead
+ // See ZOOKEEPER-1161 for more details
+ if (!enableAutocreate) {
+ throw new DatadirException("Missing snap directory "
+ + this.snapDir
+ + ", automatic data directory creation is disabled ("
+ + ZOOKEEPER_DATADIR_AUTOCREATE
+ + " is false). Please create this directory
manually.");
+ }
+
+ if (!this.snapDir.mkdirs() && !this.snapDir.exists()) {
+ throw new DatadirException("Unable to create snap directory "
+ this.snapDir);
}
}
@@ -164,6 +191,33 @@
}
}
return highestZxid;
+ }
+
+ /**
+ * Get TxnIterator for iterating through txnlog starting at a given zxid
+ *
+ * @param zxid starting zxid
+ * @return TxnIterator
+ * @throws IOException
+ */
+ public TxnIterator readTxnLog(long zxid) throws IOException {
+ return readTxnLog(zxid, true);
+ }
+
+ /**
+ * Get TxnIterator for iterating through txnlog starting at a given zxid
+ *
+ * @param zxid starting zxid
+ * @param fastForward true if the iterator should be fast forwarded to
point
+ * to the txn of a given zxid, else the iterator will point to the
+ * starting txn of a txnlog that may contain txn of a given zxid
+ * @return TxnIterator
+ * @throws IOException
+ */
+ public TxnIterator readTxnLog(long zxid, boolean fastForward)
+ throws IOException {
+ FileTxnLog txnLog = new FileTxnLog(dataDir);
+ return txnLog.read(zxid, fastForward);
}
/**
@@ -338,4 +392,14 @@
txnLog.close();
snapLog.close();
}
+
+ @SuppressWarnings("serial")
+ public static class DatadirException extends IOException {
+ public DatadirException(String msg) {
+ super(msg);
+ }
+ public DatadirException(String msg, Exception e) {
+ super(msg, e);
+ }
+ }
}
{code}
*UPDATE:* Testing 3.4.9 with this patch looks ok for me. you might have
squashed this issue.
was (Author: nicholas.dipiazza):
v1 and v2 patch with no changes. v3 doesn't:
{code}
branch-3.4
{code}
{code}
patch -p0 < ZOOKEEPER-1936.v3.patch
patching file
src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
Hunk #1 FAILED at 101.
Hunk #2 FAILED at 117.
2 out of 2 hunks FAILED -- saving rejects to file
src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java.rej
{code}
{code}
cat
src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java.rej
--- src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
+++ src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
@@ -101,7 +101,7 @@ public FileTxnSnapLog(File dataDir, File snapDir) throws
IOException {
+ " is false). Please create this directory
manually.");
}
- if (!this.dataDir.mkdirs()) {
+ if (!this.dataDir.mkdirs() && !this.dataDir.exists()) {
throw new DatadirException("Unable to create data directory "
+ this.dataDir);
}
@@ -117,7 +117,7 @@ public FileTxnSnapLog(File dataDir, File snapDir) throws
IOException {
+ " is false). Please create this directory
manually.");
}
- if (!this.snapDir.mkdirs()) {
+ if (!this.snapDir.mkdirs() && !this.snapDir.exists()) {
throw new DatadirException("Unable to create snap directory "
+ this.snapDir);
}
{code}
My attempt at patching 3.4.9:
{code}
diff --git
a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnLog.java
b/src/java/main/org/apache/zookeeper/server/persistence/FileTxnLog.java
index b1682c3..3f26b0a 100644
--- a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnLog.java
+++ b/src/java/main/org/apache/zookeeper/server/persistence/FileTxnLog.java
@@ -355,7 +355,20 @@
* logs
*/
public TxnIterator read(long zxid) throws IOException {
- return new FileTxnIterator(logDir, zxid);
+ return read(zxid, true);
+ }
+
+ /**
+ * start reading all the transactions from the given zxid.
+ *
+ * @param zxid the zxid to start reading transactions from
+ * @param fastForward true if the iterator should be fast forwarded to
point
+ * to the txn of a given zxid, else the iterator will point to the
+ * starting txn of a txnlog that may contain txn of a given zxid
+ * @return returns an iterator to iterate through the transaction logs
+ */
+ public TxnIterator read(long zxid, boolean fastForward) throws IOException
{
+ return new FileTxnIterator(logDir, zxid, fastForward);
}
/**
@@ -375,10 +388,10 @@
}
long pos = input.getPosition();
// now, truncate at the current position
- RandomAccessFile raf = new RandomAccessFile(itr.logFile, "rw");
+ RandomAccessFile raf=new RandomAccessFile(itr.logFile,"rw");
raf.setLength(pos);
raf.close();
- while (itr.goToNextLog()) {
+ while(itr.goToNextLog()) {
if (!itr.logFile.delete()) {
LOG.warn("Unable to truncate {}", itr.logFile);
}
@@ -523,12 +536,34 @@
* create an iterator over a transaction database directory
* @param logDir the transaction database directory
* @param zxid the zxid to start reading from
+ * @param fastForward true if the iterator should be fast forwarded
to
+ * point to the txn of a given zxid, else the iterator will
+ * point to the starting txn of a txnlog that may contain txn of
+ * a given zxid
+ * @throws IOException
+ */
+ public FileTxnIterator(File logDir, long zxid, boolean fastForward)
+ throws IOException {
+ this.logDir = logDir;
+ this.zxid = zxid;
+ init();
+
+ if (fastForward && hdr != null) {
+ while (hdr.getZxid() < zxid) {
+ if (!next())
+ break;
+ }
+ }
+ }
+
+ /**
+ * create an iterator over a transaction database directory
+ * @param logDir the transaction database directory
+ * @param zxid the zxid to start reading from
* @throws IOException
*/
public FileTxnIterator(File logDir, long zxid) throws IOException {
- this.logDir = logDir;
- this.zxid = zxid;
- init();
+ this(logDir, zxid, true);
}
/**
@@ -552,10 +587,17 @@
goToNextLog();
if (!next())
return;
- while (hdr.getZxid() < zxid) {
- if (!next())
- return;
+ }
+
+ /**
+ * Return total storage size of txnlog that will return by this
iterator.
+ */
+ public long getStorageSize() {
+ long sum = 0;
+ for (File f : storedFiles) {
+ sum += f.length();
}
+ return sum;
}
/**
@@ -637,8 +679,6 @@
crc.update(bytes, 0, bytes.length);
if (crcValue != crc.getValue())
throw new IOException(CRC_ERROR);
- if (bytes == null || bytes.length == 0)
- return false;
hdr = new TxnHeader();
record = SerializeUtils.deserializeTxn(bytes, hdr);
} catch (EOFException e) {
diff --git
a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
b/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
index 6f0df51..98d94b4 100644
--- a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
+++ b/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
@@ -58,6 +58,10 @@
private static final Logger LOG =
LoggerFactory.getLogger(FileTxnSnapLog.class);
+ public static final String ZOOKEEPER_DATADIR_AUTOCREATE =
+ "zookeeper.datadir.autocreate";
+
+ public static final String ZOOKEEPER_DATADIR_AUTOCREATE_DEFAULT = "true";
/**
* This listener helps
* the external apis calling
@@ -80,15 +84,38 @@
this.dataDir = new File(dataDir, version + VERSION);
this.snapDir = new File(snapDir, version + VERSION);
+ // by default create snap/log dirs, but otherwise complain instead
+ // See ZOOKEEPER-1161 for more details
+ boolean enableAutocreate = Boolean.valueOf(
+ System.getProperty(ZOOKEEPER_DATADIR_AUTOCREATE,
+ ZOOKEEPER_DATADIR_AUTOCREATE_DEFAULT));
if (!this.dataDir.exists()) {
- if (!this.dataDir.mkdirs()) {
- throw new IOException("Unable to create data directory "
+ if (!enableAutocreate) {
+ throw new DatadirException("Missing data directory "
+ + this.dataDir
+ + ", automatic data directory creation is disabled ("
+ + ZOOKEEPER_DATADIR_AUTOCREATE
+ + " is false). Please create this directory
manually.");
+ }
+
+ if (!this.dataDir.mkdirs() && !this.dataDir.exists()) {
+ throw new DatadirException("Unable to create data directory "
+ this.dataDir);
}
}
if (!this.snapDir.exists()) {
- if (!this.snapDir.mkdirs()) {
- throw new IOException("Unable to create snap directory "
+ // by default create this directory, but otherwise complain instead
+ // See ZOOKEEPER-1161 for more details
+ if (!enableAutocreate) {
+ throw new DatadirException("Missing snap directory "
+ + this.snapDir
+ + ", automatic data directory creation is disabled ("
+ + ZOOKEEPER_DATADIR_AUTOCREATE
+ + " is false). Please create this directory
manually.");
+ }
+
+ if (!this.snapDir.mkdirs() && !this.snapDir.exists()) {
+ throw new DatadirException("Unable to create snap directory "
+ this.snapDir);
}
}
@@ -164,6 +191,33 @@
}
}
return highestZxid;
+ }
+
+ /**
+ * Get TxnIterator for iterating through txnlog starting at a given zxid
+ *
+ * @param zxid starting zxid
+ * @return TxnIterator
+ * @throws IOException
+ */
+ public TxnIterator readTxnLog(long zxid) throws IOException {
+ return readTxnLog(zxid, true);
+ }
+
+ /**
+ * Get TxnIterator for iterating through txnlog starting at a given zxid
+ *
+ * @param zxid starting zxid
+ * @param fastForward true if the iterator should be fast forwarded to
point
+ * to the txn of a given zxid, else the iterator will point to the
+ * starting txn of a txnlog that may contain txn of a given zxid
+ * @return TxnIterator
+ * @throws IOException
+ */
+ public TxnIterator readTxnLog(long zxid, boolean fastForward)
+ throws IOException {
+ FileTxnLog txnLog = new FileTxnLog(dataDir);
+ return txnLog.read(zxid, fastForward);
}
/**
@@ -338,4 +392,14 @@
txnLog.close();
snapLog.close();
}
+
+ @SuppressWarnings("serial")
+ public static class DatadirException extends IOException {
+ public DatadirException(String msg) {
+ super(msg);
+ }
+ public DatadirException(String msg, Exception e) {
+ super(msg, e);
+ }
+ }
}
{code}
*UPDATE:* Testing 3.9 with this patch looks ok for me. you might have squashed
this issue.
> Server exits when unable to create data directory due to race
> --------------------------------------------------------------
>
> Key: ZOOKEEPER-1936
> URL: https://issues.apache.org/jira/browse/ZOOKEEPER-1936
> Project: ZooKeeper
> Issue Type: Bug
> Components: server
> Affects Versions: 3.4.6, 3.5.0
> Reporter: Harald Musum
> Assignee: Andrew Purtell
> Priority: Minor
> Attachments: ZOOKEEPER-1936.patch, ZOOKEEPER-1936.v2.patch,
> ZOOKEEPER-1936.v3.patch, ZOOKEEPER-1936.v3.patch, ZOOKEEPER-1936.v4.patch
>
>
> We sometime see issues with ZooKeeper server not starting and seeing this
> error in the log:
> [2014-05-27 09:29:48.248] ERROR : -
> .org.apache.zookeeper.server.ZooKeeperServerMain Unexpected exception,
> exiting abnormally\nexception=\njava.io.IOException: Unable to create data
> directory /home/y/var/zookeeper/version-2\n\tat
> org.apache.zookeeper.server.persistence.FileTxnSnapLog.<init>(FileTxnSnapLog.java:85)\n\tat
> org.apache.zookeeper.server.ZooKeeperServerMain.runFromConfig(ZooKeeperServerMain.java:103)\n\tat
> org.apache.zookeeper.server.ZooKeeperServerMain.initializeAndRun(ZooKeeperServerMain.java:86)\n\tat
> org.apache.zookeeper.server.ZooKeeperServerMain.main(ZooKeeperServerMain.java:52)\n\tat
> org.apache.zookeeper.server.quorum.QuorumPeerMain.initializeAndRun(QuorumPeerMain.java:116)\n\tat
> org.apache.zookeeper.server.quorum.QuorumPeerMain.main(QuorumPeerMain.java:78)\n\t
> [...]
> Stack trace from JVM gives this:
> "PurgeTask" daemon prio=10 tid=0x000000000201d000 nid=0x1727 runnable
> [0x00007f55d7dc7000]
> java.lang.Thread.State: RUNNABLE
> at java.io.UnixFileSystem.createDirectory(Native Method)
> at java.io.File.mkdir(File.java:1310)
> at java.io.File.mkdirs(File.java:1337)
> at
> org.apache.zookeeper.server.persistence.FileTxnSnapLog.<init>(FileTxnSnapLog.java:84)
> at org.apache.zookeeper.server.PurgeTxnLog.purge(PurgeTxnLog.java:68)
> at
> org.apache.zookeeper.server.DatadirCleanupManager$PurgeTask.run(DatadirCleanupManager.java:140)
> at java.util.TimerThread.mainLoop(Timer.java:555)
> at java.util.TimerThread.run(Timer.java:505)
> "zookeeper server" prio=10 tid=0x00000000027df800 nid=0x1715 runnable
> [0x00007f55d7ed8000]
> java.lang.Thread.State: RUNNABLE
> at java.io.UnixFileSystem.createDirectory(Native Method)
> at java.io.File.mkdir(File.java:1310)
> at java.io.File.mkdirs(File.java:1337)
> at
> org.apache.zookeeper.server.persistence.FileTxnSnapLog.<init>(FileTxnSnapLog.java:84)
> at
> org.apache.zookeeper.server.ZooKeeperServerMain.runFromConfig(ZooKeeperServerMain.java:103)
> at
> org.apache.zookeeper.server.ZooKeeperServerMain.initializeAndRun(ZooKeeperServerMain.java:86)
> at
> org.apache.zookeeper.server.ZooKeeperServerMain.main(ZooKeeperServerMain.java:52)
> at
> org.apache.zookeeper.server.quorum.QuorumPeerMain.initializeAndRun(QuorumPeerMain.java:116)
> at
> org.apache.zookeeper.server.quorum.QuorumPeerMain.main(QuorumPeerMain.java:78)
> [...]
> So it seems that when autopurge is used (as it is in our case), it might
> happen at the same time as starting the server itself. In FileTxnSnapLog() it
> will check if the directory exists and create it if not. These two tasks do
> this at the same time, and mkdir fails and server exits the JVM.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)