Author: jbellis Date: Tue Nov 30 18:45:08 2010 New Revision: 1040691 URL: http://svn.apache.org/viewvc?rev=1040691&view=rev Log: add flush for each append to periodiccommitlog mode; added periodic_without_flush option to disable this patch by jbellis; reviewed by brandonwilliams for CASSANDRA-1780
Modified: cassandra/branches/cassandra-0.7/CHANGES.txt cassandra/branches/cassandra-0.7/conf/cassandra.yaml cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/commitlog/CommitLog.java cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/commitlog/CommitLogSegment.java cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/io/util/BufferedRandomAccessFile.java Modified: cassandra/branches/cassandra-0.7/CHANGES.txt URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/CHANGES.txt?rev=1040691&r1=1040690&r2=1040691&view=diff ============================================================================== --- cassandra/branches/cassandra-0.7/CHANGES.txt (original) +++ cassandra/branches/cassandra-0.7/CHANGES.txt Tue Nov 30 18:45:08 2010 @@ -18,6 +18,8 @@ dev defined comparator, and decode properly in cli (CASSANDRA-1773) * use cross-platform newlines in cli (CASSANDRA-1786) * add ExpiringColumn support to sstable import/export (CASSANDRA-1754) + * add flush for each append to periodic commitlog mode; added + periodic_without_flush option to disable this (CASSANDRA-1780) 0.7.0-rc1 Modified: cassandra/branches/cassandra-0.7/conf/cassandra.yaml URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/conf/cassandra.yaml?rev=1040691&r1=1040690&r2=1040691&view=diff ============================================================================== --- cassandra/branches/cassandra-0.7/conf/cassandra.yaml (original) +++ cassandra/branches/cassandra-0.7/conf/cassandra.yaml Tue Nov 30 18:45:08 2010 @@ -61,17 +61,32 @@ saved_caches_directory: /var/lib/cassand # Size to allow commitlog to grow to before creating a new segment commitlog_rotation_threshold_in_mb: 128 -# commitlog_sync may be either "periodic" or "batch." -# When in batch mode, Cassandra won't ack writes until the commit log -# has been fsynced to disk. It will wait up to -# CommitLogSyncBatchWindowInMS milliseconds for other writes, before -# performing the sync. +# commitlog_sync supports the following modes: +# +# batch: +# In batch mode, Cassandra won't ack writes until the commit log +# has been fsynced to disk. But fsyncing each write at once is +# performance-prohibitive, so instead Cassandra will wait up to +# commitlog_sync_batch_window_in_ms milliseconds for other writes, before +# syncing that "batch" at once. This causes a performance penalty +# of about 15% when the commitlog is on a separate device, and much more +# when it shares the same device as the data files. +# +# periodic: +# Writes may be acked immediately (without waiting for the commitlog +# append) and the CommitLog is simply synced every +# commitlog_sync_period_in_ms milliseconds. +# +# periodic_without_flush: +# Like periodic, but the commitlog write buffer is only flushed +# before the sync, so any interruption to the process can be +# expected to lose some writes. This is the old 0.6 periodic +# behavior and will be removed in future versions if testing +# continues to show no performance benefit over normal periodic. commitlog_sync: periodic - -# the other option is "timed," where writes may be acked immediately -# and the CommitLog is simply synced every commitlog_sync_period_in_ms -# milliseconds. commitlog_sync_period_in_ms: 10000 +# commitlog_sync: batch +# commitlog_sync_batch_window_in_ms: 10 # Addresses of hosts that are deemed contact points. # Cassandra nodes use this list of hosts to find each other and learn Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java?rev=1040691&r1=1040690&r2=1040691&view=diff ============================================================================== --- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java (original) +++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java Tue Nov 30 18:45:08 2010 @@ -105,7 +105,8 @@ public class Config public static enum CommitLogSync { periodic, - batch + batch, + periodic_without_flush } public static enum DiskAccessMode { Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/commitlog/CommitLog.java URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/commitlog/CommitLog.java?rev=1040691&r1=1040690&r2=1040691&view=diff ============================================================================== --- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/commitlog/CommitLog.java (original) +++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/commitlog/CommitLog.java Tue Nov 30 18:45:08 2010 @@ -112,7 +112,11 @@ public class CommitLog // All we need to do is create a new one. segments.add(new CommitLogSegment()); - if (DatabaseDescriptor.getCommitLogSync() == Config.CommitLogSync.periodic) + if (DatabaseDescriptor.getCommitLogSync() == Config.CommitLogSync.batch) + { + executor = new BatchCommitLogExecutorService(); + } + else { executor = new PeriodicCommitLogExecutorService(); final Callable syncer = new Callable() @@ -147,10 +151,6 @@ public class CommitLog } }, "PERIODIC-COMMIT-LOG-SYNCER").start(); } - else - { - executor = new BatchCommitLogExecutorService(); - } } public void resetUnsafe() @@ -490,6 +490,7 @@ public class CommitLog // TODO this should be a Runnable since it doesn't actually return anything, but it's difficult to do that // without breaking the fragile CheaterFutureTask in BatchCLES. + final static boolean flushEachWrite = DatabaseDescriptor.getCommitLogSync() == Config.CommitLogSync.periodic; class LogRecordAdder implements Callable, Runnable { final RowMutation rowMutation; @@ -512,6 +513,10 @@ public class CommitLog sync(); segments.add(new CommitLogSegment()); } + else if (flushEachWrite) + { + currentSegment().flush(); + } } catch (IOException e) { Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/commitlog/CommitLogSegment.java URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/commitlog/CommitLogSegment.java?rev=1040691&r1=1040690&r2=1040691&view=diff ============================================================================== --- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/commitlog/CommitLogSegment.java (original) +++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/commitlog/CommitLogSegment.java Tue Nov 30 18:45:08 2010 @@ -140,6 +140,11 @@ public class CommitLogSegment logWriter.sync(); } + public void flush() throws IOException + { + logWriter.flush(); + } + public CommitLogContext getContext() { return new CommitLogContext(logWriter.getFilePointer()); Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/io/util/BufferedRandomAccessFile.java URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/io/util/BufferedRandomAccessFile.java?rev=1040691&r1=1040690&r2=1040691&view=diff ============================================================================== --- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/io/util/BufferedRandomAccessFile.java (original) +++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/io/util/BufferedRandomAccessFile.java Tue Nov 30 18:45:08 2010 @@ -19,10 +19,8 @@ package org.apache.cassandra.io.util; import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.RandomAccessFile; -import java.util.Arrays; /** * A <code>BufferedRandomAccessFile</code> is like a @@ -168,7 +166,7 @@ public class BufferedRandomAccessFile ex { if (syncNeeded_) { - flushBuffer(); + flush(); getChannel().force(true); // true, because file length counts as "metadata" syncNeeded_ = false; } @@ -182,7 +180,7 @@ public class BufferedRandomAccessFile ex } /* Flush any dirty bytes in the buffer to disk. */ - private void flushBuffer() throws IOException + public void flush() throws IOException { if (this.dirty_) { @@ -229,7 +227,7 @@ public class BufferedRandomAccessFile ex */ private void reBuffer() throws IOException { - this.flushBuffer(); + this.flush(); this.lo_ = this.curr_; this.maxHi_ = this.lo_ + (long) this.buff_.length; if (this.diskPos_ != this.lo_)