Author: suresh
Date: Thu Oct 18 00:12:50 2012
New Revision: 1399488

URL: http://svn.apache.org/viewvc?rev=1399488&view=rev
Log:
HDFS-4065. TestDFSShell.testGet sporadically fails attempting to corrupt block 
files due to race condition. Contributed by Chris Nauroth.

Modified:
    hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt
    
hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/hdfs/TestDFSShell.java

Modified: hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt?rev=1399488&r1=1399487&r2=1399488&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt (original)
+++ hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt Thu Oct 18 
00:12:50 2012
@@ -169,3 +169,6 @@ Branch-hadoop-1-win - unreleased
 
     HADOOP-8935. Make 'winutils ls' show the SID if the owner does not 
     exist on the system. (Chuan Liu via suresh)
+
+    HDFS-4065. TestDFSShell.testGet sporadically fails attempting to corrupt 
+    block files due to race condition. (Chris Nauroth via suresh)

Modified: 
hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/hdfs/TestDFSShell.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/hdfs/TestDFSShell.java?rev=1399488&r1=1399487&r2=1399488&view=diff
==============================================================================
--- 
hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/hdfs/TestDFSShell.java
 (original)
+++ 
hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/hdfs/TestDFSShell.java
 Thu Oct 18 00:12:50 2012
@@ -1185,65 +1185,87 @@ public class TestDFSShell extends TestCa
   
   public void testGet() throws IOException {
     DFSTestUtil.setLogLevel2All(FSInputChecker.LOG);
+
+    final String fname = "testGet.txt";
+    Path root = new Path("/test/get");
+    final Path remotef = new Path(root, fname);
     final Configuration conf = new Configuration();
-    // Race can happen here: block scanner is reading the file when test tries
-    // to corrupt the test file, which will fail the test on Windows platform.
-    // Disable block scanner to avoid this race.
-    conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
-  
-    MiniDFSCluster cluster = new MiniDFSCluster(conf, 2, true, null);
-    DistributedFileSystem dfs = (DistributedFileSystem)cluster.getFileSystem();
+
+    TestGetRunner runner = new TestGetRunner() {
+       private int count = 0;
+       private FsShell shell = new FsShell(conf);
+
+       public String run(int exitcode, String... options) throws IOException {
+         String dst = TEST_ROOT_DIR + "/" + fname+ ++count;
+         String[] args = new String[options.length + 3];
+         args[0] = "-get"; 
+         args[args.length - 2] = remotef.toString();
+         args[args.length - 1] = dst;
+         for(int i = 0; i < options.length; i++) {
+           args[i + 1] = options[i];
+         }
+         show("args=" + Arrays.asList(args));
+
+         try {
+           assertEquals(exitcode, shell.run(args));
+         } catch (Exception e) {
+           assertTrue(StringUtils.stringifyException(e), false); 
+         }
+         return exitcode == 0? DFSTestUtil.readFile(new File(dst)): null; 
+       }
+    };
+
+    File localf = createLocalFile(new File(TEST_ROOT_DIR, fname));
+    MiniDFSCluster cluster = null;
+    DistributedFileSystem dfs = null;
 
     try {
-      final String fname = "testGet.txt";
-      final File localf = createLocalFile(new File(TEST_ROOT_DIR, fname));
-      final String localfcontent = DFSTestUtil.readFile(localf);
-      final Path root = mkdir(dfs, new Path("/test/get"));
-      final Path remotef = new Path(root, fname);
-      dfs.copyFromLocalFile(false, false, new Path(localf.getPath()), remotef);
+      cluster = new MiniDFSCluster(conf, 2, true, null);
+      dfs = (DistributedFileSystem)cluster.getFileSystem();
 
-      final FsShell shell = new FsShell();
-      shell.setConf(conf);
-      TestGetRunner runner = new TestGetRunner() {
-        private int count = 0;
-
-        public String run(int exitcode, String... options) throws IOException {
-          String dst = TEST_ROOT_DIR + "/" + fname+ ++count;
-          String[] args = new String[options.length + 3];
-          args[0] = "-get"; 
-          args[args.length - 2] = remotef.toString();
-          args[args.length - 1] = dst;
-          for(int i = 0; i < options.length; i++) {
-            args[i + 1] = options[i];
-          }
-          show("args=" + Arrays.asList(args));
-          
-          try {
-            assertEquals(exitcode, shell.run(args));
-          } catch (Exception e) {
-            assertTrue(StringUtils.stringifyException(e), false); 
-          }
-          return exitcode == 0? DFSTestUtil.readFile(new File(dst)): null; 
-        }
-      };
+      mkdir(dfs, root);
+      dfs.copyFromLocalFile(false, false, new Path(localf.getPath()), remotef);
+      String localfcontent = DFSTestUtil.readFile(localf);
 
       assertEquals(localfcontent, runner.run(0));
       assertEquals(localfcontent, runner.run(0, "-ignoreCrc"));
 
-      //find and modify the block files
+      // find block files to modify later
       List<File> files = getBlockFiles(cluster);
+
+      // Shut down cluster and then corrupt the block files by overwriting a
+      // portion with junk data.  We must shut down the cluster so that threads
+      // in the data node do not hold locks on the block files while we try to
+      // write into them.  Particularly on Windows, the data node's use of the
+      // FileChannel.transferTo method can cause block files to be memory 
mapped
+      // in read-only mode during the transfer to a client, and this causes a
+      // locking conflict.  The call to shutdown the cluster blocks until all
+      // DataXceiver threads exit, preventing this problem.
+      dfs.close();
+      cluster.shutdown();
+
       show("files=" + files);
       corrupt(files);
 
+      // Start the cluster again, but do not reformat, so prior files remain.
+      cluster = new MiniDFSCluster(conf, 2, false, null);
+      dfs = (DistributedFileSystem)cluster.getFileSystem();
+
       assertEquals(null, runner.run(-1));
       String corruptedcontent = runner.run(0, "-ignoreCrc");
       assertEquals(localfcontent.substring(1), corruptedcontent.substring(1));
       assertEquals(localfcontent.charAt(0)+1, corruptedcontent.charAt(0));
-
-      localf.delete();
     } finally {
-      try {dfs.close();} catch (Exception e) {}
-      cluster.shutdown();
+      if (null != dfs) {
+        try {
+          dfs.close();
+        } catch (Exception e) {
+        }
+      }
+      if (null != cluster) {
+        cluster.shutdown();
+      }
+      localf.delete();
     }
   }
 


Reply via email to