Author: kihwal Date: Fri Apr 19 14:08:29 2013 New Revision: 1469839 URL: http://svn.apache.org/r1469839 Log: HDFS-4699. TestPipelinesFailover#testPipelineRecoveryStress fails sporadically. Contributed by Chris Nauroth.
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1469839&r1=1469838&r2=1469839&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Apr 19 14:08:29 2013 @@ -2560,6 +2560,9 @@ Release 0.23.8 - UNRELEASED HDFS-4477. Secondary namenode may retain old tokens (daryn via kihwal) + HDFS-4699. TestPipelinesFailover#testPipelineRecoveryStress fails + sporadically (Chris Nauroth via kihwal) + Release 0.23.7 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java?rev=1469839&r1=1469838&r2=1469839&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java Fri Apr 19 14:08:29 2013 @@ -1286,7 +1286,10 @@ public class DataNode extends Configured LOG.warn("checkDiskError: exception: ", e); if (e instanceof SocketException || e instanceof SocketTimeoutException || e instanceof ClosedByInterruptException - || e.getMessage().startsWith("Broken pipe")) { + || e.getMessage().startsWith("An established connection was aborted") + || e.getMessage().startsWith("Broken pipe") + || e.getMessage().startsWith("Connection reset") + || e.getMessage().contains("java.nio.channels.SocketChannel")) { LOG.info("Not checking disk as checkDiskError was called on a network" + " related exception"); return; Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java?rev=1469839&r1=1469838&r2=1469839&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java Fri Apr 19 14:08:29 2013 @@ -422,6 +422,11 @@ public class TestPipelinesFailover { // Disable permissions so that another user can recover the lease. harness.conf.setBoolean( DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false); + // This test triggers rapid NN failovers. The client retry policy uses an + // exponential backoff. This can quickly lead to long sleep times and even + // timeout the whole test. Cap the sleep time at 1s to prevent this. + harness.conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY, + 1000); final MiniDFSCluster cluster = harness.startCluster(); try { @@ -537,11 +542,10 @@ public class TestPipelinesFailover { } /** - * Try to cover the lease on the given file for up to 30 - * seconds. + * Try to recover the lease on the given file for up to 60 seconds. * @param fsOtherUser the filesystem to use for the recoverLease call * @param testPath the path on which to run lease recovery - * @throws TimeoutException if lease recover does not succeed within 30 + * @throws TimeoutException if lease recover does not succeed within 60 * seconds * @throws InterruptedException if the thread is interrupted */ @@ -564,7 +568,7 @@ public class TestPipelinesFailover { } return success; } - }, 1000, 30000); + }, 1000, 60000); } catch (TimeoutException e) { throw new TimeoutException("Timed out recovering lease for " + testPath);