Author: cmccabe Date: Tue Mar 4 06:51:40 2014 New Revision: 1573888 URL: http://svn.apache.org/r1573888 Log: HDFS-6046. add dfs.client.mmap.enabled (cmccabe)
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/ (props changed) hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/ (props changed) hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/ (props changed) hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/ ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-hdfs-project:r1573887 Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/ ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs:r1573887 Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1573888&r1=1573887&r2=1573888&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Tue Mar 4 06:51:40 2014 @@ -126,6 +126,8 @@ Release 2.4.0 - UNRELEASED HDFS-5950. The DFSClient and DataNode should use shared memory segments to communicate short-circuit information. (cmccabe) + HDFS-6046. add dfs.client.mmap.enabled (cmccabe) + OPTIMIZATIONS HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/ ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java:r1573887 Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java?rev=1573888&r1=1573887&r2=1573888&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java Tue Mar 4 06:51:40 2014 @@ -281,6 +281,7 @@ public class DFSClient implements java.i final long shortCircuitStreamsCacheExpiryMs; final int shortCircuitSharedMemoryWatcherInterruptCheckMs; + final boolean shortCircuitMmapEnabled; final int shortCircuitMmapCacheSize; final long shortCircuitMmapCacheExpiryMs; final long shortCircuitMmapCacheRetryTimeout; @@ -400,6 +401,9 @@ public class DFSClient implements java.i shortCircuitStreamsCacheExpiryMs = conf.getLong( DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY, DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT); + shortCircuitMmapEnabled = conf.getBoolean( + DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED, + DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED_DEFAULT); shortCircuitMmapCacheSize = conf.getInt( DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE, DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT); Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java?rev=1573888&r1=1573887&r2=1573888&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java Tue Mar 4 06:51:40 2014 @@ -431,6 +431,8 @@ public class DFSConfigKeys extends Commo public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024; public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic"; public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false; + public static final String DFS_CLIENT_MMAP_ENABLED= "dfs.client.mmap.enabled"; + public static final boolean DFS_CLIENT_MMAP_ENABLED_DEFAULT = true; public static final String DFS_CLIENT_MMAP_CACHE_SIZE = "dfs.client.mmap.cache.size"; public static final int DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT = 256; public static final String DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS = "dfs.client.mmap.cache.timeout.ms"; Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java?rev=1573888&r1=1573887&r2=1573888&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java Tue Mar 4 06:51:40 2014 @@ -1571,7 +1571,10 @@ implements ByteBufferReadable, CanSetDro "at position " + pos); } } - ByteBuffer buffer = tryReadZeroCopy(maxLength, opts); + ByteBuffer buffer = null; + if (dfsClient.getConf().shortCircuitMmapEnabled) { + buffer = tryReadZeroCopy(maxLength, opts); + } if (buffer != null) { return buffer; } Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml?rev=1573888&r1=1573887&r2=1573888&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml Tue Mar 4 06:51:40 2014 @@ -1521,25 +1521,33 @@ </property> <property> + <name>dfs.client.mmap.enabled</name> + <value>true</value> + <description> + If this is set to false, the client won't attempt to perform memory-mapped reads. + </description> +</property> + +<property> <name>dfs.client.mmap.cache.size</name> - <value>1024</value> + <value>256</value> <description> When zero-copy reads are used, the DFSClient keeps a cache of recently used memory mapped regions. This parameter controls the maximum number of entries that we will keep in that cache. - If this is set to 0, we will not allow mmap. - The larger this number is, the more file descriptors we will potentially use for memory-mapped files. mmaped files also use virtual address space. You may need to increase your ulimit virtual address space limits before increasing the client mmap cache size. + + Note that you can still do zero-copy reads when this size is set to 0. </description> </property> <property> <name>dfs.client.mmap.cache.timeout.ms</name> - <value>900000</value> + <value>3600000</value> <description> The minimum length of time that we will keep an mmap entry in the cache between uses. If an entry is in the cache longer than this, and nobody @@ -1558,7 +1566,7 @@ <property> <name>dfs.client.short.circuit.replica.stale.threshold.ms</name> - <value>3000000</value> + <value>1800000</value> <description> The maximum amount of time that we will consider a short-circuit replica to be valid, if there is no communication from the DataNode. After this time Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java?rev=1573888&r1=1573887&r2=1573888&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java Tue Mar 4 06:51:40 2014 @@ -21,6 +21,8 @@ import static org.apache.hadoop.hdfs.DFS import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE; import java.io.File; import java.io.FileInputStream; @@ -698,4 +700,63 @@ public class TestEnhancedByteBufferAcces } }, 10, 60000); } + + @Test + public void testClientMmapDisable() throws Exception { + HdfsConfiguration conf = initZeroCopyTest(); + conf.setBoolean(DFS_CLIENT_MMAP_ENABLED, false); + MiniDFSCluster cluster = null; + final Path TEST_PATH = new Path("/a"); + final int TEST_FILE_LENGTH = 16385; + final int RANDOM_SEED = 23453; + final String CONTEXT = "testClientMmapDisable"; + FSDataInputStream fsIn = null; + DistributedFileSystem fs = null; + conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT); + + try { + // With DFS_CLIENT_MMAP_ENABLED set to false, we should not do memory + // mapped reads. + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + cluster.waitActive(); + fs = cluster.getFileSystem(); + DFSTestUtil.createFile(fs, TEST_PATH, + TEST_FILE_LENGTH, (short)1, RANDOM_SEED); + DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1); + fsIn = fs.open(TEST_PATH); + try { + fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS)); + Assert.fail("expected zero-copy read to fail when client mmaps " + + "were disabled."); + } catch (UnsupportedOperationException e) { + } + } finally { + if (fsIn != null) fsIn.close(); + if (fs != null) fs.close(); + if (cluster != null) cluster.shutdown(); + } + + fsIn = null; + fs = null; + cluster = null; + try { + // Now try again with DFS_CLIENT_MMAP_CACHE_SIZE == 0. It should work. + conf.setBoolean(DFS_CLIENT_MMAP_ENABLED, true); + conf.setInt(DFS_CLIENT_MMAP_CACHE_SIZE, 0); + conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT + ".1"); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + cluster.waitActive(); + fs = cluster.getFileSystem(); + DFSTestUtil.createFile(fs, TEST_PATH, + TEST_FILE_LENGTH, (short)1, RANDOM_SEED); + DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1); + fsIn = fs.open(TEST_PATH); + ByteBuffer buf = fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS)); + fsIn.releaseBuffer(buf); + } finally { + if (fsIn != null) fsIn.close(); + if (fs != null) fs.close(); + if (cluster != null) cluster.shutdown(); + } + } }