This is an automated email from the ASF dual-hosted git repository.
mmerli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new 8091096 Allow to bypass journal for writes (#2401)
8091096 is described below
commit 80910966388a9fa2ee4cd7fccbe07f7dee84ce44
Author: Matteo Merli <[email protected]>
AuthorDate: Sat May 8 06:27:35 2021 -0700
Allow to bypass journal for writes (#2401)
* Allow to bypass journal for writes
* Added unit test to validate the new config
* Fixed test variable names
---
.../java/org/apache/bookkeeper/bookie/Bookie.java | 8 ++
.../bookkeeper/conf/ServerConfiguration.java | 24 +++++
.../bookkeeper/bookie/BookieJournalBypassTest.java | 106 +++++++++++++++++++++
conf/bk_server.conf | 8 ++
4 files changed, 146 insertions(+)
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Bookie.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Bookie.java
index 87143f2..28c76f6 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Bookie.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/Bookie.java
@@ -150,6 +150,8 @@ public class Bookie extends BookieCriticalThread {
private final ByteBufAllocator allocator;
+ private final boolean writeDataToJournal;
+
@StatsDoc(
name = JOURNAL_MEMORY_MAX,
help = "The max amount of memory in bytes that can be used by the
bookie journal"
@@ -721,6 +723,7 @@ public class Bookie extends BookieCriticalThread {
this.ledgerDirsManager = createLedgerDirsManager(conf, diskChecker,
statsLogger.scope(LD_LEDGER_SCOPE));
this.indexDirsManager = createIndexDirsManager(conf, diskChecker,
statsLogger.scope(LD_INDEX_SCOPE),
this.ledgerDirsManager);
+ this.writeDataToJournal = conf.getJournalWriteData();
this.allocator = allocator;
// instantiate zookeeper client to initialize ledger manager
@@ -1338,6 +1341,11 @@ public class Bookie extends BookieCriticalThread {
}
}
+ if (!writeDataToJournal) {
+ cb.writeComplete(0, ledgerId, entryId, null, ctx);
+ return;
+ }
+
if (LOG.isTraceEnabled()) {
LOG.trace("Adding {}@{}", entryId, ledgerId);
}
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
index f5e28d1..00ef028 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
@@ -131,6 +131,7 @@ public class ServerConfiguration extends
AbstractConfiguration<ServerConfigurati
protected static final String MAX_JOURNAL_SIZE = "journalMaxSizeMB";
protected static final String MAX_BACKUP_JOURNALS = "journalMaxBackups";
protected static final String JOURNAL_SYNC_DATA = "journalSyncData";
+ protected static final String JOURNAL_WRITE_DATA = "journalWriteData";
protected static final String JOURNAL_ADAPTIVE_GROUP_WRITES =
"journalAdaptiveGroupWrites";
protected static final String JOURNAL_MAX_GROUP_WAIT_MSEC =
"journalMaxGroupWaitMSec";
protected static final String JOURNAL_BUFFERED_WRITES_THRESHOLD =
"journalBufferedWritesThreshold";
@@ -2108,6 +2109,29 @@ public class ServerConfiguration extends
AbstractConfiguration<ServerConfigurati
}
/**
+ * Should the data be written to journal before acknowledgment.
+ *
+ * <p>Default is true
+ *
+ * @return
+ */
+ public boolean getJournalWriteData() {
+ return getBoolean(JOURNAL_WRITE_DATA, true);
+ }
+
+ /**
+ * Should the data be written to journal before acknowledgment.
+ *
+ * <p>Default is true
+ *
+ * @return
+ */
+ public ServerConfiguration setJournalWriteData(boolean journalWriteData) {
+ setProperty(JOURNAL_WRITE_DATA, journalWriteData);
+ return this;
+ }
+
+ /**
* Enable or disable journal syncs.
*
* <p>By default, data sync is enabled to guarantee durability of writes.
diff --git
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalBypassTest.java
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalBypassTest.java
new file mode 100644
index 0000000..2d14a0f
--- /dev/null
+++
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieJournalBypassTest.java
@@ -0,0 +1,106 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+package org.apache.bookkeeper.bookie;
+
+import static org.junit.Assert.assertEquals;
+import lombok.Cleanup;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.bookkeeper.client.BookKeeper;
+import org.apache.bookkeeper.client.api.WriteHandle;
+import org.apache.bookkeeper.conf.ClientConfiguration;
+import org.apache.bookkeeper.conf.ServerConfiguration;
+import org.apache.bookkeeper.proto.BookieServer;
+import org.apache.bookkeeper.test.BookKeeperClusterTestCase;
+import org.junit.Test;
+
+/**
+ * Tests that we're skipping journal when it's configured to do so.
+ */
+@Slf4j
+public class BookieJournalBypassTest extends BookKeeperClusterTestCase {
+
+ private int bookieIdx = 0;
+
+ public BookieJournalBypassTest() {
+ super(2);
+ }
+
+ @Override
+ protected BookieServer startBookie(ServerConfiguration conf) throws
Exception {
+ if (bookieIdx++ == 0) {
+ // First bookie will have the journal disabled
+ conf.setJournalWriteData(false);
+ }
+ return super.startBookie(conf);
+ }
+
+ @Test
+ public void testJournalBypass() throws Exception {
+ ClientConfiguration conf = new ClientConfiguration(baseClientConf);
+
+ Journal journal0 = bs.get(0).getBookie().journals.get(0);
+ LedgerStorage ls0 = bs.get(0).getBookie().getLedgerStorage();
+
+ Journal journal1 = bs.get(1).getBookie().journals.get(0);
+ LedgerStorage ls1 = bs.get(1).getBookie().getLedgerStorage();
+
+ ls0.flush();
+ ls1.flush();
+
+ long bk0OffsetBefore =
journal0.getLastLogMark().getCurMark().getLogFileOffset();
+ long bk1OffsetBefore =
journal1.getLastLogMark().getCurMark().getLogFileOffset();
+
+ writeEntries(conf);
+ ls0.flush();
+ ls1.flush();
+
+ long bk0OffsetAfter =
journal0.getLastLogMark().getCurMark().getLogFileOffset();
+ long bk1OffsetAfter =
journal1.getLastLogMark().getCurMark().getLogFileOffset();
+
+ int flushDelta = 10 * 1024;
+ int dataSize = 10 * 1024 * 1024;
+
+ // Offset for journal-0 will be very close to previous point, just few
KBs when flushing
+ assertEquals(bk0OffsetBefore, bk0OffsetAfter, flushDelta);
+
+ // Offset for journal-0 should have changed with the data size
+ assertEquals(bk1OffsetBefore + dataSize, bk1OffsetAfter, flushDelta);
+ }
+
+ private void writeEntries(ClientConfiguration conf)
+ throws Exception {
+ @Cleanup
+ BookKeeper bkc = new BookKeeper(conf);
+
+ @Cleanup
+ WriteHandle wh = bkc.newCreateLedgerOp()
+ .withEnsembleSize(2)
+ .withWriteQuorumSize(2)
+ .withAckQuorumSize(2)
+ .withPassword("".getBytes())
+ .execute()
+ .join();
+
+ for (int i = 0; i < 10; i++) {
+ wh.append(new byte[1024 * 1024]);
+ }
+ }
+}
diff --git a/conf/bk_server.conf b/conf/bk_server.conf
index 897ea33..95e5bc3 100755
--- a/conf/bk_server.conf
+++ b/conf/bk_server.conf
@@ -326,6 +326,14 @@ journalDirectories=/tmp/bk-txn
# Should we remove pages from page cache after force write
# journalRemoveFromPageCache=true
+# Should the data be written on journal.
+# By default, data is written on journal for durability of writes.
+# Beware: while disabling data journaling in the Bookie journal might improve
the bookie write performance, it will also
+# introduce the possibility of data loss. With no journal, the write
operations are passed to the storage engine
+# and then acknowledged. In case of power failure, the affected bookie might
lose the unflushed data. If the ledger
+# is replicated to multiple bookies, the chances of data loss are reduced
though still present.
+# journalWriteData=true
+
# Should the data be fsynced on journal before acknowledgment.
# By default, data sync is enabled to guarantee durability of writes.
# Beware: while disabling data sync in the Bookie journal might improve the
bookie write performance, it will also