This is an automated email from the ASF dual-hosted git repository. hexiaoqiao pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push: new 053afb7063f HDFS-17704. Fix TestDecommission and TestDecommissionWithBackoffMonitor often run timeout. (#7266). Contributed by hfutatzhanghb. 053afb7063f is described below commit 053afb7063febe7fcef2aa147d6eceaf8d215486 Author: hfutatzhanghb <hfutzhan...@163.com> AuthorDate: Fri Jan 24 13:50:00 2025 +0800 HDFS-17704. Fix TestDecommission and TestDecommissionWithBackoffMonitor often run timeout. (#7266). Contributed by hfutatzhanghb. Signed-off-by: He Xiaoqiao <hexiaoq...@apache.org> --- .../apache/hadoop/hdfs/AdminStatesBaseTest.java | 4 ++ .../org/apache/hadoop/hdfs/TestDecommission.java | 51 +++++++++++----------- .../apache/hadoop/hdfs/TestMaintenanceState.java | 50 ++++++++++----------- 3 files changed, 55 insertions(+), 50 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AdminStatesBaseTest.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AdminStatesBaseTest.java index bf25958b69d..0046da688d4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AdminStatesBaseTest.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AdminStatesBaseTest.java @@ -28,10 +28,12 @@ import java.util.List; import java.util.Map; import java.util.Random; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.util.Lists; import org.junit.Rule; import org.junit.rules.TemporaryFolder; +import org.junit.rules.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -69,6 +71,8 @@ public class AdminStatesBaseTest { @Rule public TemporaryFolder baseDir = new TemporaryFolder(); + @Rule + public Timeout timeout = new Timeout(600, TimeUnit.SECONDS); private HostsFileWriter hostsFileWriter; private Configuration conf; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java index 0133d3aec37..93cec715ed5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java @@ -188,7 +188,7 @@ private void verifyStats(NameNode namenode, FSNamesystem fsn, /** * Tests decommission for non federated cluster */ - @Test(timeout=360000) + @Test public void testDecommission() throws IOException { testDecommission(1, 6); } @@ -198,7 +198,7 @@ public void testDecommission() throws IOException { * to other datanodes and satisfy the replication factor. Make sure the * datanode won't get stuck in decommissioning state. */ - @Test(timeout = 360000) + @Test public void testDecommission2() throws IOException { LOG.info("Starting test testDecommission"); int numNamenodes = 1; @@ -247,7 +247,7 @@ public void testDecommission2() throws IOException { /** * Test decommission for federeated cluster */ - @Test(timeout=360000) + @Test public void testDecommissionFederation() throws IOException { testDecommission(2, 2); } @@ -262,7 +262,7 @@ public void testDecommissionFederation() throws IOException { * That creates inconsistent state and prevent SBN from finishing * decommission. */ - @Test(timeout=360000) + @Test public void testDecommissionOnStandby() throws Exception { getConf().setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); getConf().setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, @@ -435,7 +435,7 @@ private void testDecommission(int numNamenodes, int numDatanodes) /** * Test that over-replicated blocks are deleted on recommission. */ - @Test(timeout=120000) + @Test public void testRecommission() throws Exception { final int numDatanodes = 6; try { @@ -516,7 +516,7 @@ public Boolean get() { * Tests cluster storage statistics during decommissioning for non * federated cluster */ - @Test(timeout=360000) + @Test public void testClusterStats() throws Exception { testClusterStats(1); } @@ -525,7 +525,7 @@ public void testClusterStats() throws Exception { * Tests cluster storage statistics during decommissioning for * federated cluster */ - @Test(timeout=360000) + @Test public void testClusterStatsFederation() throws Exception { testClusterStats(3); } @@ -575,7 +575,7 @@ private DataNode getDataNode(DatanodeInfo decomInfo) { * in the include file are allowed to connect to the namenode in a non * federated cluster. */ - @Test(timeout=360000) + @Test public void testHostsFile() throws IOException, InterruptedException { // Test for a single namenode cluster testHostsFile(1); @@ -586,7 +586,7 @@ public void testHostsFile() throws IOException, InterruptedException { * in the include file are allowed to connect to the namenode in a * federated cluster. */ - @Test(timeout=360000) + @Test public void testHostsFileFederation() throws IOException, InterruptedException { // Test for 3 namenode federated cluster @@ -624,7 +624,7 @@ public void testHostsFile(int numNameNodes) throws IOException, } } - @Test(timeout=120000) + @Test public void testDecommissionWithOpenfile() throws IOException, InterruptedException { LOG.info("Starting test testDecommissionWithOpenfile"); @@ -676,7 +676,7 @@ public void testDecommissionWithOpenfile() fdos.close(); } - @Test(timeout = 20000) + @Test public void testDecommissionWithUnknownBlock() throws IOException { startCluster(1, 3); @@ -795,7 +795,7 @@ public Boolean get() { } } - @Test(timeout=180000) + @Test public void testDecommissionWithOpenfileReporting() throws Exception { LOG.info("Starting test testDecommissionWithOpenfileReporting"); @@ -901,7 +901,7 @@ public void run() { * 2. close file with decommissioning * @throws Exception */ - @Test(timeout=360000) + @Test public void testDecommissionWithCloseFileAndListOpenFiles() throws Exception { LOG.info("Starting test testDecommissionWithCloseFileAndListOpenFiles"); @@ -958,7 +958,7 @@ public void testDecommissionWithCloseFileAndListOpenFiles() fileSys.delete(file, false); } - @Test(timeout = 360000) + @Test public void testDecommissionWithOpenFileAndBlockRecovery() throws IOException, InterruptedException { startCluster(1, 6); @@ -1005,7 +1005,7 @@ public void testDecommissionWithOpenFileAndBlockRecovery() assertEquals(dfs.getFileStatus(file).getLen(), writtenBytes); } - @Test(timeout=120000) + @Test public void testCloseWhileDecommission() throws IOException, ExecutionException, InterruptedException { LOG.info("Starting test testCloseWhileDecommission"); @@ -1064,7 +1064,7 @@ public void testCloseWhileDecommission() throws IOException, * to the IBR, all three nodes dn1/dn2/dn3 enter Decommissioning and then the * DN reports the IBR. */ - @Test(timeout=120000) + @Test public void testAllocAndIBRWhileDecommission() throws IOException { LOG.info("Starting test testAllocAndIBRWhileDecommission"); getConf().setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, @@ -1149,7 +1149,7 @@ public void testAllocAndIBRWhileDecommission() throws IOException { /** * Tests restart of namenode while datanode hosts are added to exclude file **/ - @Test(timeout=360000) + @Test public void testDecommissionWithNamenodeRestart() throws IOException, InterruptedException { LOG.info("Starting test testDecommissionWithNamenodeRestart"); @@ -1201,7 +1201,7 @@ public void testDecommissionWithNamenodeRestart() /** * Tests dead node count after restart of namenode **/ - @Test(timeout=360000) + @Test public void testDeadNodeCountAfterNamenodeRestart()throws Exception { LOG.info("Starting test testDeadNodeCountAfterNamenodeRestart"); int numNamenodes = 1; @@ -1248,7 +1248,7 @@ public void testDeadNodeCountAfterNamenodeRestart()throws Exception { * valid DNS hostname for the DataNode. See HDFS-5237 for background. */ @Ignore - @Test(timeout=360000) + @Test public void testIncludeByRegistrationName() throws Exception { // Any IPv4 address starting with 127 functions as a "loopback" address // which is connected to the current host. So by choosing 127.0.0.100 @@ -1314,7 +1314,7 @@ public Boolean get() { }, 500, 5000); } - @Test(timeout=120000) + @Test public void testBlocksPerInterval() throws Exception { GenericTestUtils.setLogLevel( LoggerFactory.getLogger(DatanodeAdminManager.class), Level.TRACE); @@ -1369,7 +1369,7 @@ private void doDecomCheck(DatanodeManager datanodeManager, /** * Test DatanodeAdminManager#monitor can swallow any exceptions by default. */ - @Test(timeout=120000) + @Test public void testPendingNodeButDecommissioned() throws Exception { // Only allow one node to be decom'd at a time getConf().setInt( @@ -1416,7 +1416,7 @@ public void testPendingNodeButDecommissioned() throws Exception { } } - @Test(timeout=120000) + @Test public void testPendingNodes() throws Exception { GenericTestUtils.setLogLevel( LoggerFactory.getLogger(DatanodeAdminManager.class), Level.TRACE); @@ -1639,7 +1639,7 @@ public void testUsedCapacity() throws Exception { /** * Verify if multiple DataNodes can be decommission at the same time. */ - @Test(timeout = 360000) + @Test public void testMultipleNodesDecommission() throws Exception { startCluster(1, 5); final Path file = new Path("/testMultipleNodesDecommission.dat"); @@ -1685,7 +1685,7 @@ public Boolean get() { * Force the tracked nodes set to be filled with nodes lost while decommissioning, * then decommission healthy nodes & validate they are decommissioned eventually. */ - @Test(timeout = 120000) + @Test public void testRequeueUnhealthyDecommissioningNodes() throws Exception { // Create a MiniDFSCluster with 3 live datanode in AdminState=NORMAL and // 2 dead datanodes in AdminState=DECOMMISSION_INPROGRESS and a file @@ -1911,7 +1911,8 @@ private void createClusterWithDeadNodesDecommissionInProgress(final int numLiveN under-replicated block can be replicated to sufficient datanodes & the decommissioning node can be decommissioned. */ - @Test(timeout = 60000) + @SuppressWarnings("checkstyle:methodlength") + @Test public void testDeleteCorruptReplicaForUnderReplicatedBlock() throws Exception { // Constants final Path file = new Path("/test-file"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java index 5d2365a349c..3a7b560499b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java @@ -87,7 +87,7 @@ void setMinMaintenanceR(int minMaintenanceR) { /** * Test valid value range for the config namenode.maintenance.replication.min. */ - @Test (timeout = 60000) + @Test public void testMaintenanceMinReplConfigRange() { LOG.info("Setting testMaintenanceMinReplConfigRange"); @@ -120,7 +120,7 @@ public void testMaintenanceMinReplConfigRange() { * Verify a node can transition from AdminStates.ENTERING_MAINTENANCE to * AdminStates.NORMAL. */ - @Test(timeout = 360000) + @Test public void testTakeNodeOutOfEnteringMaintenance() throws Exception { LOG.info("Starting testTakeNodeOutOfEnteringMaintenance"); final int replicas = 1; @@ -149,7 +149,7 @@ public void testTakeNodeOutOfEnteringMaintenance() throws Exception { * Verify a AdminStates.ENTERING_MAINTENANCE node can expire and transition * to AdminStates.NORMAL upon timeout. */ - @Test(timeout = 360000) + @Test public void testEnteringMaintenanceExpiration() throws Exception { LOG.info("Starting testEnteringMaintenanceExpiration"); final int replicas = 1; @@ -173,7 +173,7 @@ public void testEnteringMaintenanceExpiration() throws Exception { /** * Verify node stays in AdminStates.NORMAL with invalid expiration. */ - @Test(timeout = 360000) + @Test public void testInvalidExpiration() throws Exception { LOG.info("Starting testInvalidExpiration"); final int replicas = 1; @@ -195,7 +195,7 @@ public void testInvalidExpiration() throws Exception { * When a dead node is put to maintenance, it transitions directly to * AdminStates.IN_MAINTENANCE. */ - @Test(timeout = 360000) + @Test public void testPutDeadNodeToMaintenance() throws Exception { LOG.info("Starting testPutDeadNodeToMaintenance"); final int replicas = 1; @@ -229,7 +229,7 @@ public void testPutDeadNodeToMaintenance() throws Exception { * AdminStates.IN_MAINTENANCE. Then AdminStates.IN_MAINTENANCE expires and * transitions to AdminStates.NORMAL. */ - @Test(timeout = 360000) + @Test public void testPutDeadNodeToMaintenanceWithExpiration() throws Exception { LOG.info("Starting testPutDeadNodeToMaintenanceWithExpiration"); final Path file = @@ -266,7 +266,7 @@ public void testPutDeadNodeToMaintenanceWithExpiration() throws Exception { /** * Transition from decommissioned state to maintenance state. */ - @Test(timeout = 360000) + @Test public void testTransitionFromDecommissioned() throws IOException { LOG.info("Starting testTransitionFromDecommissioned"); final Path file = new Path("/testTransitionFromDecommissioned.dat"); @@ -289,7 +289,7 @@ public void testTransitionFromDecommissioned() throws IOException { * Transition from decommissioned state to maintenance state. * After the maintenance state expires, it is transitioned to NORMAL. */ - @Test(timeout = 360000) + @Test public void testTransitionFromDecommissionedAndExpired() throws IOException { LOG.info("Starting testTransitionFromDecommissionedAndExpired"); final Path file = @@ -320,7 +320,7 @@ public void testTransitionFromDecommissionedAndExpired() throws IOException { * If node becomes dead when it is in AdminStates.ENTERING_MAINTENANCE, it * should stay in AdminStates.ENTERING_MAINTENANCE state. */ - @Test(timeout = 360000) + @Test public void testNodeDeadWhenInEnteringMaintenance() throws Exception { LOG.info("Starting testNodeDeadWhenInEnteringMaintenance"); final int numNamenodes = 1; @@ -366,7 +366,7 @@ public void testNodeDeadWhenInEnteringMaintenance() throws Exception { * DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY and * its file's replication factor into account. */ - @Test(timeout = 360000) + @Test public void testExpectedReplications() throws IOException { LOG.info("Starting testExpectedReplications"); testExpectedReplication(1); @@ -411,7 +411,7 @@ private void testExpectedReplication(int replicationFactor, * Verify a node can transition directly to AdminStates.IN_MAINTENANCE when * DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY is set to zero. */ - @Test(timeout = 360000) + @Test public void testZeroMinMaintenanceReplication() throws Exception { LOG.info("Starting testZeroMinMaintenanceReplication"); setMinMaintenanceR(0); @@ -434,7 +434,7 @@ public void testZeroMinMaintenanceReplication() throws Exception { * DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY is set to zero. Then later * transition to NORMAL after maintenance expiration. */ - @Test(timeout = 360000) + @Test public void testZeroMinMaintenanceReplicationWithExpiration() throws Exception { LOG.info("Starting testZeroMinMaintenanceReplicationWithExpiration"); @@ -460,7 +460,7 @@ public void testZeroMinMaintenanceReplicationWithExpiration() /** * Test file block replication lesser than maintenance minimum. */ - @Test(timeout = 360000) + @Test public void testFileBlockReplicationAffectingMaintenance() throws Exception { int defaultReplication = getConf().getInt(DFSConfigKeys @@ -537,7 +537,7 @@ private void testFileBlockReplicationImpl( /** * Transition from IN_MAINTENANCE to DECOMMISSIONED. */ - @Test(timeout = 360000) + @Test public void testTransitionToDecommission() throws IOException { LOG.info("Starting testTransitionToDecommission"); final int numNamenodes = 1; @@ -581,7 +581,7 @@ public void testTransitionToDecommission() throws IOException { /** * Transition from decommissioning state to maintenance state. */ - @Test(timeout = 360000) + @Test public void testTransitionFromDecommissioning() throws IOException { LOG.info("Starting testTransitionFromDecommissioning"); startCluster(1, 3); @@ -612,7 +612,7 @@ public void testTransitionFromDecommissioning() throws IOException { * in decommission. Make sure decommission process take * maintenance replica into account. */ - @Test(timeout = 360000) + @Test public void testDecommissionDifferentNodeAfterMaintenances() throws Exception { testDecommissionDifferentNodeAfterMaintenance(2); @@ -657,7 +657,7 @@ private void testDecommissionDifferentNodeAfterMaintenance(int repl) * Verify if multiple DataNodes can transition to maintenance state * at the same time. */ - @Test(timeout = 360000) + @Test public void testMultipleNodesMaintenance() throws Exception { startCluster(1, 5); final Path file = new Path("/testMultipleNodesMaintenance.dat"); @@ -691,7 +691,7 @@ public void testMultipleNodesMaintenance() throws Exception { cleanupFile(fileSys, file); } - @Test(timeout = 360000) + @Test public void testChangeReplicationFactors() throws IOException { // Prior to any change, there is 1 maintenance node and 2 live nodes. @@ -762,7 +762,7 @@ private void testChangeReplicationFactor(int oldFactor, int newFactor, * c. Take the node out of maintenance => NN should schedule the replication * and end up with 3 live. */ - @Test(timeout = 360000) + @Test public void testTakeDeadNodeOutOfMaintenance() throws Exception { LOG.info("Starting testTakeDeadNodeOutOfMaintenance"); final int numNamenodes = 1; @@ -818,7 +818,7 @@ public void testTakeDeadNodeOutOfMaintenance() throws Exception { * d. Restart the maintenance dn => 1 maintenance, 3 live. * e. Take the node out of maintenance => over replication => 3 live. */ - @Test(timeout = 360000) + @Test public void testWithNNAndDNRestart() throws Exception { LOG.info("Starting testWithNNAndDNRestart"); final int numNamenodes = 1; @@ -876,7 +876,7 @@ public void testWithNNAndDNRestart() throws Exception { /** * Machine under maintenance state won't be chosen for new block allocation. */ - @Test(timeout = 3600000) + @Test public void testWriteAfterMaintenance() throws IOException { LOG.info("Starting testWriteAfterMaintenance"); startCluster(1, 3); @@ -908,7 +908,7 @@ public void testWriteAfterMaintenance() throws IOException { * Given there are minReplication replicas somewhere else, * it can be transitioned to AdminStates.IN_MAINTENANCE. */ - @Test(timeout = 360000) + @Test public void testEnterMaintenanceWhenFileOpen() throws Exception { LOG.info("Starting testEnterMaintenanceWhenFileOpen"); startCluster(1, 3); @@ -927,7 +927,7 @@ public void testEnterMaintenanceWhenFileOpen() throws Exception { /** * Machine under maintenance state won't be chosen for invalidation. */ - @Test(timeout = 360000) + @Test public void testInvalidation() throws IOException { LOG.info("Starting testInvalidation"); int numNamenodes = 1; @@ -959,7 +959,7 @@ public void testInvalidation() throws IOException { cleanupFile(fileSys, file); } - @Test(timeout = 120000) + @Test public void testFileCloseAfterEnteringMaintenance() throws Exception { LOG.info("Starting testFileCloseAfterEnteringMaintenance"); int expirationInMs = 30 * 1000; @@ -1138,7 +1138,7 @@ static private DatanodeInfo[] getFirstBlockReplicasDatanodeInfos( } } - @Test(timeout = 120000) + @Test public void testReportMaintenanceNodes() throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream err = new ByteArrayOutputStream(); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org