Author: suresh Date: Fri Sep 21 05:47:03 2012 New Revision: 1388330 URL: http://svn.apache.org/viewvc?rev=1388330&view=rev Log: HDFS-3596. Improve FSEditLog pre-allocation in branch-1. Contributed by Colin Patrick McCabe, backported by Jing Zhao.
Modified: hadoop/common/branches/branch-1.1/CHANGES.txt hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java Modified: hadoop/common/branches/branch-1.1/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/CHANGES.txt?rev=1388330&r1=1388329&r2=1388330&view=diff ============================================================================== --- hadoop/common/branches/branch-1.1/CHANGES.txt (original) +++ hadoop/common/branches/branch-1.1/CHANGES.txt Fri Sep 21 05:47:03 2012 @@ -341,6 +341,9 @@ Release 1.1.0 - 2012.09.16 HDFS-3466. Get HTTP kerberos principal from the web authentication keytab. (omalley) + HDFS-3596. Improve FSEditLog pre-allocation in branch-1 + (Colin Patrick McCabe, backported by Jing Zhao via suresh) + Release 1.0.4 - Unreleased NEW FEATURES Modified: hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1388330&r1=1388329&r2=1388330&view=diff ============================================================================== --- hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original) +++ hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Fri Sep 21 05:47:03 2012 @@ -37,6 +37,8 @@ import java.lang.Math; import java.nio.channels.FileChannel; import java.nio.ByteBuffer; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.FSConstants; @@ -55,6 +57,8 @@ import org.apache.hadoop.security.token. * */ public class FSEditLog { + private static final Log LOG = LogFactory.getLog(FSEditLog.class); + private static final byte OP_INVALID = -1; private static final byte OP_ADD = 0; private static final byte OP_RENAME = 1; // rename @@ -80,6 +84,8 @@ public class FSEditLog { private static final byte OP_UPDATE_MASTER_KEY = 21; //update master key private static int sizeFlushBuffer = 512*1024; + /** Preallocation length in bytes for writing edit log. */ + static final int MIN_PREALLOCATION_LENGTH = 1024 * 1024; private ArrayList<EditLogOutputStream> editStreams = null; private FSImage fsimage = null; @@ -121,13 +127,22 @@ public class FSEditLog { * An implementation of the abstract class {@link EditLogOutputStream}, * which stores edits in a local file. */ - static private class EditLogFileOutputStream extends EditLogOutputStream { + static class EditLogFileOutputStream extends EditLogOutputStream { + /** Preallocation buffer, padded with OP_INVALID */ + private static final ByteBuffer PREALLOCATION_BUFFER + = ByteBuffer.allocateDirect(MIN_PREALLOCATION_LENGTH); + static { + PREALLOCATION_BUFFER.position(0).limit(MIN_PREALLOCATION_LENGTH); + for(int i = 0; i < PREALLOCATION_BUFFER.capacity(); i++) { + PREALLOCATION_BUFFER.put(OP_INVALID); + } + } + private File file; private FileOutputStream fp; // file stream for storing edit logs private FileChannel fc; // channel of the file stream for sync private DataOutputBuffer bufCurrent; // current buffer for writing private DataOutputBuffer bufReady; // buffer ready for flushing - static ByteBuffer fill = ByteBuffer.allocateDirect(512); // preallocation EditLogFileOutputStream(File name) throws IOException { super(); @@ -174,6 +189,8 @@ public class FSEditLog { @Override public void close() throws IOException { + LOG.info("closing edit log: position=" + fc.position() + ", editlog=" + getName()); + // close should have been called after all pending transactions // have been flushed & synced. int bufSize = bufCurrent.size(); @@ -185,11 +202,13 @@ public class FSEditLog { bufCurrent.close(); bufReady.close(); - // remove the last INVALID marker from transaction log. + // remove any preallocated padding bytes from the transaction log. fc.truncate(fc.position()); fp.close(); bufCurrent = bufReady = null; + + LOG.info("close success: truncate to " + file.length() + ", editlog=" + getName()); } /** @@ -199,7 +218,6 @@ public class FSEditLog { @Override void setReadyToFlush() throws IOException { assert bufReady.size() == 0 : "previous data is not flushed yet"; - write(OP_INVALID); // insert end-of-file marker DataOutputBuffer tmp = bufReady; bufReady = bufCurrent; bufCurrent = tmp; @@ -216,7 +234,6 @@ public class FSEditLog { bufReady.writeTo(fp); // write data to file bufReady.reset(); // erase all data in the buffer fc.force(false); // metadata updates not needed because of preallocation - fc.position(fc.position()-1); // skip back the end-of-file marker } /** @@ -230,16 +247,26 @@ public class FSEditLog { // allocate a big chunk of data private void preallocate() throws IOException { - long position = fc.position(); - if (position + 4096 >= fc.size()) { - FSNamesystem.LOG.debug("Preallocating Edit log, current size " + - fc.size()); - long newsize = position + 1024*1024; // 1MB - fill.position(0); - int written = fc.write(fill, newsize); - FSNamesystem.LOG.debug("Edit log size is now " + fc.size() + - " written " + written + " bytes " + - " at offset " + newsize); + long size = fc.size(); + int bufSize = bufReady.getLength(); + long need = bufSize - (size - fc.position()); + if (need <= 0) { + return; + } + long oldSize = size; + long total = 0; + long fillCapacity = PREALLOCATION_BUFFER.capacity(); + while (need > 0) { + PREALLOCATION_BUFFER.position(0); + do { + size += fc.write(PREALLOCATION_BUFFER, size); + } while (PREALLOCATION_BUFFER.remaining() > 0); + need -= fillCapacity; + total += fillCapacity; + } + if(FSNamesystem.LOG.isDebugEnabled()) { + FSNamesystem.LOG.debug("Preallocated " + total + " bytes at the end of " + + "the edit log (offset " + oldSize + ")"); } } Modified: hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java?rev=1388330&r1=1388329&r2=1388330&view=diff ============================================================================== --- hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java (original) +++ hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java Fri Sep 21 05:47:03 2012 @@ -76,6 +76,36 @@ public class TestEditLog extends TestCas } } } + + public void testEditLogPreallocation() throws IOException { + final File TEST_DIR = + new File(System.getProperty("test.build.data", "/tmp")); + final File TEST_EDITS = new File(TEST_DIR, "edit_log"); + + FSEditLog.EditLogFileOutputStream elfos = null; + try { + elfos = new FSEditLog.EditLogFileOutputStream(TEST_EDITS); + byte b[] = new byte[1024]; + for (int i = 0; i < b.length; i++) { + b[i] = 0; + } + elfos.write(b); + elfos.setReadyToFlush(); + elfos.flushAndSync(); + assertEquals(FSEditLog.MIN_PREALLOCATION_LENGTH, + elfos.getFile().length()); + for (int i = 0; + i < 2 * FSEditLog.MIN_PREALLOCATION_LENGTH / b.length; i++) { + elfos.write(b); + elfos.setReadyToFlush(); + elfos.flushAndSync(); + } + assertEquals(3 * FSEditLog.MIN_PREALLOCATION_LENGTH, elfos.getFile().length()); + } finally { + if (elfos != null) elfos.close(); + if (TEST_EDITS.exists()) TEST_EDITS.delete(); + } + } /** * Tests transaction logging in dfs.