Author: mattf Date: Mon Nov 19 07:19:46 2012 New Revision: 1411087 URL: http://svn.apache.org/viewvc?rev=1411087&view=rev Log: merged r1372708 from branch-1: HDFS-3658. Fix bugs in TestDFSClientRetries and add more tests. Contributed by Tsz Wo Sze.
Modified: hadoop/common/branches/branch-1.1/CHANGES.txt hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java Modified: hadoop/common/branches/branch-1.1/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/CHANGES.txt?rev=1411087&r1=1411086&r2=1411087&view=diff ============================================================================== --- hadoop/common/branches/branch-1.1/CHANGES.txt (original) +++ hadoop/common/branches/branch-1.1/CHANGES.txt Mon Nov 19 07:19:46 2012 @@ -59,6 +59,8 @@ Release 1.1.1 - 2012.11.18 MAPREDUCE-4792. Unit Test TestJobTrackerRestartWithLostTracker fails with ant-1.8.4. (Amir Sanjar via mattf) + HDFS-3658. Fix bugs in TestDFSClientRetries and add more tests. (szetszwo) + Release 1.1.0 - 2012.09.28 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1411087&r1=1411086&r2=1411087&view=diff ============================================================================== --- hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Mon Nov 19 07:19:46 2012 @@ -1610,6 +1610,9 @@ public class FSNamesystem implements FSC +src+" for "+clientName); synchronized (this) { + if (isInSafeMode()) {//check safemode first for failing-fast + throw new SafeModeException("Cannot add block to " + src, safeMode); + } // have we exceeded the configured limit of fs objects. checkFsObjectLimit(); @@ -1627,7 +1630,7 @@ public class FSNamesystem implements FSC replication = (int)pendingFile.getReplication(); } - // choose targets for the new block tobe allocated. + // choose targets for the new block to be allocated. DatanodeDescriptor targets[] = replicator.chooseTarget(replication, clientNode, excludedNodes, @@ -1640,7 +1643,7 @@ public class FSNamesystem implements FSC // Allocate a new block and record it in the INode. synchronized (this) { - if (isInSafeMode()) { + if (isInSafeMode()) { //make sure it is not in safemode again. throw new SafeModeException("Cannot add block to " + src, safeMode); } INode[] pathINodes = dir.getExistingPathINodes(src); @@ -4879,6 +4882,10 @@ public class FSNamesystem implements FSC this.safeReplication = conf.getInt("dfs.replication.min", 1); this.blockTotal = 0; this.blockSafe = 0; + + LOG.info("dfs.safemode.threshold.pct = " + threshold); + LOG.info("dfs.namenode.safemode.min.datanodes = " + datanodeThreshold); + LOG.info("dfs.safemode.extension = " + extension); } /** Modified: hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java?rev=1411087&r1=1411086&r2=1411087&view=diff ============================================================================== --- hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java (original) +++ hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java Mon Nov 19 07:19:46 2012 @@ -252,7 +252,8 @@ public class MiniDFSCluster { int replication = conf.getInt("dfs.replication", 3); conf.setInt("dfs.replication", Math.min(replication, numDataNodes)); - conf.setInt("dfs.safemode.extension", 0); + int safemodeExtension = conf.getInt("dfs.safemode.extension.testing", 0); + conf.setInt("dfs.safemode.extension", safemodeExtension); conf.setInt("dfs.namenode.decommission.interval", 3); // 3 second // Set a small delay on blockReceived in the minicluster to approximate Modified: hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java?rev=1411087&r1=1411086&r2=1411087&view=diff ============================================================================== --- hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java (original) +++ hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java Mon Nov 19 07:19:46 2012 @@ -31,6 +31,7 @@ import java.net.SocketTimeoutException; import java.net.URI; import java.util.ArrayList; import java.util.List; +import java.util.Random; import java.util.concurrent.TimeUnit; import junit.framework.Assert; @@ -42,6 +43,7 @@ import org.apache.commons.logging.impl.L import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -78,6 +80,7 @@ import org.apache.hadoop.security.Access import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.StringUtils; import org.apache.log4j.Level; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -544,6 +547,8 @@ public class TestDFSClientRetries extend final Path dir = new Path("/testNamenodeRestart"); conf.setBoolean(DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_ENABLED_KEY, true); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1); + conf.setInt("dfs.safemode.extension.testing", 5000); final short numDatanodes = 3; final MiniDFSCluster cluster = new MiniDFSCluster( @@ -565,11 +570,38 @@ public class TestDFSClientRetries extend final FileStatus s1 = fs.getFileStatus(file1); assertEquals(length, s1.getLen()); + //create file4, write some data but not close + final Path file4 = new Path(dir, "file4"); + final FSDataOutputStream out4 = fs.create(file4, false, 4096, + fs.getDefaultReplication(file4), 1024L, null); + final byte[] bytes = new byte[1000]; + new Random().nextBytes(bytes); + out4.write(bytes); + out4.write(bytes); + out4.sync(); + //shutdown namenode assertTrue(DistributedFileSystem.isHealthy(uri)); cluster.shutdownNameNode(); assertFalse(DistributedFileSystem.isHealthy(uri)); + //namenode is down, continue writing file4 in a thread + final Thread file4thread = new Thread(new Runnable() { + @Override + public void run() { + try { + //write some more data and then close the file + out4.write(bytes); + out4.write(bytes); + out4.write(bytes); + out4.close(); + } catch (Exception e) { + exceptions.add(e); + } + } + }); + file4thread.start(); + //namenode is down, read the file in a thread final Thread reader = new Thread(new Runnable() { @Override @@ -628,10 +660,26 @@ public class TestDFSClientRetries extend //check file1 and file3 thread.join(); + assertEmpty(exceptions); assertEquals(s1.getLen(), fs.getFileStatus(file3).getLen()); assertEquals(fs.getFileChecksum(file1), fs.getFileChecksum(file3)); reader.join(); + assertEmpty(exceptions); + + //check file4 + file4thread.join(); + assertEmpty(exceptions); + { + final FSDataInputStream in = fs.open(file4); + int count = 0; + for(int r; (r = in.read()) != -1; count++) { + Assert.assertEquals(String.format("count=%d", count), + bytes[count % bytes.length], (byte)r); + } + Assert.assertEquals(5 * bytes.length, count); + in.close(); + } //enter safe mode assertTrue(DistributedFileSystem.isHealthy(uri)); @@ -671,19 +719,28 @@ public class TestDFSClientRetries extend LOG.info("GOOD!", fnfe); } - if (!exceptions.isEmpty()) { - LOG.error("There are " + exceptions.size() + " exception(s):"); - for(int i = 0; i < exceptions.size(); i++) { - LOG.error("Exception " + i, exceptions.get(i)); - } - fail(); - } + assertEmpty(exceptions); } finally { cluster.shutdown(); } } - public static FileSystem createFsWithDifferentUsername( + static void assertEmpty(final List<Exception> exceptions) { + if (!exceptions.isEmpty()) { + final StringBuilder b = new StringBuilder("There are ") + .append(exceptions.size()) + .append(" exception(s):"); + for(int i = 0; i < exceptions.size(); i++) { + b.append("\n Exception ") + .append(i) + .append(": ") + .append(StringUtils.stringifyException(exceptions.get(i))); + } + fail(b.toString()); + } + } + + private static FileSystem createFsWithDifferentUsername( final Configuration conf, final boolean isWebHDFS ) throws IOException, InterruptedException { String username = UserGroupInformation.getCurrentUser().getShortUserName()+"_XXX";