Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java Thu Aug 7 07:38:23 2014 @@ -658,6 +658,71 @@ public class TestBalancer { oneNodeTest(conf, false); } + /* we first start a cluster and fill the cluster up to a certain size. + * then redistribute blocks according the required distribution. + * Then we start an empty datanode. + * Afterwards a balancer is run to balance the cluster. + * A partially filled datanode is excluded during balancing. + * This triggers a situation where one of the block's location is unknown. + */ + @Test(timeout=100000) + public void testUnknownDatanode() throws Exception { + Configuration conf = new HdfsConfiguration(); + initConf(conf); + long distribution[] = new long[] {50*CAPACITY/100, 70*CAPACITY/100, 0*CAPACITY/100}; + long capacities[] = new long[]{CAPACITY, CAPACITY, CAPACITY}; + String racks[] = new String[] {RACK0, RACK1, RACK1}; + + int numDatanodes = distribution.length; + if (capacities.length != numDatanodes || racks.length != numDatanodes) { + throw new IllegalArgumentException("Array length is not the same"); + } + + // calculate total space that need to be filled + final long totalUsedSpace = sum(distribution); + + // fill the cluster + ExtendedBlock[] blocks = generateBlocks(conf, totalUsedSpace, + (short) numDatanodes); + + // redistribute blocks + Block[][] blocksDN = distributeBlocks( + blocks, (short)(numDatanodes-1), distribution); + + // restart the cluster: do NOT format the cluster + conf.set(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, "0.0f"); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes) + .format(false) + .racks(racks) + .simulatedCapacities(capacities) + .build(); + try { + cluster.waitActive(); + client = NameNodeProxies.createProxy(conf, cluster.getFileSystem(0).getUri(), + ClientProtocol.class).getProxy(); + + for(int i = 0; i < 3; i++) { + cluster.injectBlocks(i, Arrays.asList(blocksDN[i]), null); + } + + cluster.startDataNodes(conf, 1, true, null, + new String[]{RACK0}, null,new long[]{CAPACITY}); + cluster.triggerHeartbeats(); + + Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf); + Set<String> datanodes = new HashSet<String>(); + datanodes.add(cluster.getDataNodes().get(0).getDatanodeId().getHostName()); + Balancer.Parameters p = new Balancer.Parameters( + Balancer.Parameters.DEFAULT.policy, + Balancer.Parameters.DEFAULT.threshold, + datanodes, Balancer.Parameters.DEFAULT.nodesToBeIncluded); + final int r = Balancer.run(namenodes, p, conf); + assertEquals(Balancer.ReturnStatus.SUCCESS.code, r); + } finally { + cluster.shutdown(); + } + } + /** * Test parse method in Balancer#Cli class with threshold value out of * boundaries. @@ -789,13 +854,42 @@ public class TestBalancer { } catch (IllegalArgumentException e) { } - parameters = new String[] { "-threshold 1 -policy" }; + parameters = new String[] {"-threshold", "1", "-policy"}; + try { + Balancer.Cli.parse(parameters); + fail(reason); + } catch (IllegalArgumentException e) { + + } + parameters = new String[] {"-threshold", "1", "-include"}; try { Balancer.Cli.parse(parameters); fail(reason); } catch (IllegalArgumentException e) { } + parameters = new String[] {"-threshold", "1", "-exclude"}; + try { + Balancer.Cli.parse(parameters); + fail(reason); + } catch (IllegalArgumentException e) { + + } + parameters = new String[] {"-include", "-f"}; + try { + Balancer.Cli.parse(parameters); + fail(reason); + } catch (IllegalArgumentException e) { + + } + parameters = new String[] {"-exclude", "-f"}; + try { + Balancer.Cli.parse(parameters); + fail(reason); + } catch (IllegalArgumentException e) { + + } + parameters = new String[] {"-include", "testnode1", "-exclude", "testnode2"}; try { Balancer.Cli.parse(parameters);
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java Thu Aug 7 07:38:23 2014 @@ -268,4 +268,14 @@ public class BlockManagerTestUtil { } return reports.toArray(StorageReport.EMPTY_ARRAY); } + + /** + * Have DatanodeManager check decommission state. + * @param dm the DatanodeManager to manipulate + */ + public static void checkDecommissionState(DatanodeManager dm, + DatanodeDescriptor node) { + dm.checkDecommissionState(node); + } + } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java Thu Aug 7 07:38:23 2014 @@ -368,7 +368,7 @@ public class TestBlockManager { DatanodeStorageInfo[] pipeline) throws IOException { for (int i = 1; i < pipeline.length; i++) { DatanodeStorageInfo storage = pipeline[i]; - bm.addBlock(storage.getDatanodeDescriptor(), storage.getStorageID(), blockInfo, null); + bm.addBlock(storage, blockInfo, null); blockInfo.addStorage(storage); } } @@ -549,12 +549,12 @@ public class TestBlockManager { // send block report, should be processed reset(node); - bm.processReport(node, new DatanodeStorage(ds.getStorageID()), "pool", + bm.processReport(node, new DatanodeStorage(ds.getStorageID()), new BlockListAsLongs(null, null)); assertEquals(1, ds.getBlockReportCount()); // send block report again, should NOT be processed reset(node); - bm.processReport(node, new DatanodeStorage(ds.getStorageID()), "pool", + bm.processReport(node, new DatanodeStorage(ds.getStorageID()), new BlockListAsLongs(null, null)); assertEquals(1, ds.getBlockReportCount()); @@ -566,7 +566,7 @@ public class TestBlockManager { assertEquals(0, ds.getBlockReportCount()); // ready for report again // send block report, should be processed after restart reset(node); - bm.processReport(node, new DatanodeStorage(ds.getStorageID()), "pool", + bm.processReport(node, new DatanodeStorage(ds.getStorageID()), new BlockListAsLongs(null, null)); assertEquals(1, ds.getBlockReportCount()); } @@ -595,7 +595,7 @@ public class TestBlockManager { // send block report while pretending to already have blocks reset(node); doReturn(1).when(node).numBlocks(); - bm.processReport(node, new DatanodeStorage(ds.getStorageID()), "pool", + bm.processReport(node, new DatanodeStorage(ds.getStorageID()), new BlockListAsLongs(null, null)); assertEquals(1, ds.getBlockReportCount()); } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java Thu Aug 7 07:38:23 2014 @@ -33,6 +33,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.server.blockmanagement.CorruptReplicasMap.Reason; import org.junit.Test; @@ -89,14 +90,14 @@ public class TestCorruptReplicaInfo { DatanodeDescriptor dn1 = DFSTestUtil.getLocalDatanodeDescriptor(); DatanodeDescriptor dn2 = DFSTestUtil.getLocalDatanodeDescriptor(); - crm.addToCorruptReplicasMap(getBlock(0), dn1, "TEST"); + addToCorruptReplicasMap(crm, getBlock(0), dn1); assertEquals("Number of corrupt blocks not returning correctly", 1, crm.size()); - crm.addToCorruptReplicasMap(getBlock(1), dn1, "TEST"); + addToCorruptReplicasMap(crm, getBlock(1), dn1); assertEquals("Number of corrupt blocks not returning correctly", 2, crm.size()); - crm.addToCorruptReplicasMap(getBlock(1), dn2, "TEST"); + addToCorruptReplicasMap(crm, getBlock(1), dn2); assertEquals("Number of corrupt blocks not returning correctly", 2, crm.size()); @@ -109,7 +110,7 @@ public class TestCorruptReplicaInfo { 0, crm.size()); for (Long block_id: block_ids) { - crm.addToCorruptReplicasMap(getBlock(block_id), dn1, "TEST"); + addToCorruptReplicasMap(crm, getBlock(block_id), dn1); } assertEquals("Number of corrupt blocks not returning correctly", @@ -127,4 +128,9 @@ public class TestCorruptReplicaInfo { crm.getCorruptReplicaBlockIds(10, 7L))); } + + private static void addToCorruptReplicasMap(CorruptReplicasMap crm, + Block blk, DatanodeDescriptor dn) { + crm.addToCorruptReplicasMap(blk, dn, "TEST", Reason.NONE); + } } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeDescriptor.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeDescriptor.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeDescriptor.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeDescriptor.java Thu Aug 7 07:38:23 2014 @@ -63,16 +63,16 @@ public class TestDatanodeDescriptor { assertTrue(storages.length > 0); final String storageID = storages[0].getStorageID(); // add first block - assertTrue(dd.addBlock(storageID, blk)); + assertTrue(storages[0].addBlock(blk)); assertEquals(1, dd.numBlocks()); // remove a non-existent block assertFalse(dd.removeBlock(blk1)); assertEquals(1, dd.numBlocks()); // add an existent block - assertFalse(dd.addBlock(storageID, blk)); + assertFalse(storages[0].addBlock(blk)); assertEquals(1, dd.numBlocks()); // add second block - assertTrue(dd.addBlock(storageID, blk1)); + assertTrue(storages[0].addBlock(blk1)); assertEquals(2, dd.numBlocks()); // remove first block assertTrue(dd.removeBlock(blk)); Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingDataNodeMessages.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingDataNodeMessages.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingDataNodeMessages.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingDataNodeMessages.java Thu Aug 7 07:38:23 2014 @@ -26,6 +26,7 @@ import org.apache.hadoop.hdfs.DFSTestUti import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.blockmanagement.PendingDataNodeMessages.ReportedBlockInfo; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; +import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.junit.Test; import com.google.common.base.Joiner; @@ -43,8 +44,10 @@ public class TestPendingDataNodeMessages @Test public void testQueues() { DatanodeDescriptor fakeDN = DFSTestUtil.getLocalDatanodeDescriptor(); - msgs.enqueueReportedBlock(fakeDN, "STORAGE_ID", block1Gs1, ReplicaState.FINALIZED); - msgs.enqueueReportedBlock(fakeDN, "STORAGE_ID", block1Gs2, ReplicaState.FINALIZED); + DatanodeStorage storage = new DatanodeStorage("STORAGE_ID"); + DatanodeStorageInfo storageInfo = new DatanodeStorageInfo(fakeDN, storage); + msgs.enqueueReportedBlock(storageInfo, block1Gs1, ReplicaState.FINALIZED); + msgs.enqueueReportedBlock(storageInfo, block1Gs2, ReplicaState.FINALIZED); assertEquals(2, msgs.count()); Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java Thu Aug 7 07:38:23 2014 @@ -83,7 +83,7 @@ public class TestReplicationPolicy { private static NameNode namenode; private static BlockPlacementPolicy replicator; private static final String filename = "/dummyfile.txt"; - private static DatanodeDescriptor dataNodes[]; + private static DatanodeDescriptor[] dataNodes; private static DatanodeStorageInfo[] storages; // The interval for marking a datanode as stale, private static final long staleInterval = @@ -1122,8 +1122,7 @@ public class TestReplicationPolicy { // Adding this block will increase its current replication, and that will // remove it from the queue. bm.addStoredBlockUnderConstruction(new StatefulBlockInfo(info, info, - ReplicaState.FINALIZED), TestReplicationPolicy.dataNodes[0], - "STORAGE"); + ReplicaState.FINALIZED), TestReplicationPolicy.storages[0]); // Choose 1 block from UnderReplicatedBlocks. Then it should pick 1 block // from QUEUE_VERY_UNDER_REPLICATED. Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java Thu Aug 7 07:38:23 2014 @@ -25,6 +25,7 @@ import java.io.FilenameFilter; import java.io.IOException; import java.net.InetSocketAddress; import java.net.Socket; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -384,7 +385,7 @@ public class TestDataNodeVolumeFailure { continue; } - String [] res = metaFilesInDir(dir); + List<File> res = MiniDFSCluster.getAllBlockMetadataFiles(dir); if(res == null) { System.out.println("res is null for dir = " + dir + " i=" + i + " and j=" + j); continue; @@ -392,7 +393,8 @@ public class TestDataNodeVolumeFailure { //System.out.println("for dn" + i + "." + j + ": " + dir + "=" + res.length+ " files"); //int ii = 0; - for(String s: res) { + for(File f: res) { + String s = f.getName(); // cut off "blk_-" at the beginning and ".meta" at the end assertNotNull("Block file name should not be null", s); String bid = s.substring(s.indexOf("_")+1, s.lastIndexOf("_")); @@ -408,25 +410,9 @@ public class TestDataNodeVolumeFailure { //System.out.println("dir1="+dir.getPath() + "blocks=" + res.length); //System.out.println("dir2="+dir2.getPath() + "blocks=" + res2.length); - total += res.length; + total += res.size(); } } return total; } - - /* - * count how many files *.meta are in the dir - */ - private String [] metaFilesInDir(File dir) { - String [] res = dir.list( - new FilenameFilter() { - @Override - public boolean accept(File dir, String name) { - return name.startsWith("blk_") && - name.endsWith(Block.METADATA_EXTENSION); - } - } - ); - return res; - } } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDeleteBlockPool.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDeleteBlockPool.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDeleteBlockPool.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDeleteBlockPool.java Thu Aug 7 07:38:23 2014 @@ -103,9 +103,10 @@ public class TestDeleteBlockPool { fs1.delete(new Path("/alpha"), true); // Wait till all blocks are deleted from the dn2 for bpid1. - while ((MiniDFSCluster.getFinalizedDir(dn2StorageDir1, - bpid1).list().length != 0) || (MiniDFSCluster.getFinalizedDir( - dn2StorageDir2, bpid1).list().length != 0)) { + File finalDir1 = MiniDFSCluster.getFinalizedDir(dn2StorageDir1, bpid1); + File finalDir2 = MiniDFSCluster.getFinalizedDir(dn2StorageDir1, bpid2); + while ((!DatanodeUtil.dirNoFilesRecursive(finalDir1)) || + (!DatanodeUtil.dirNoFilesRecursive(finalDir2))) { try { Thread.sleep(3000); } catch (Exception ignored) { Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java Thu Aug 7 07:38:23 2014 @@ -201,7 +201,7 @@ public class TestDiskError { } /** - * Checks whether {@link DataNode#checkDiskError()} is being called or not. + * Checks whether {@link DataNode#checkDiskErrorAsync()} is being called or not. * Before refactoring the code the above function was not getting called * @throws IOException, InterruptedException */ @@ -214,7 +214,7 @@ public class TestDiskError { DataNode dataNode = cluster.getDataNodes().get(0); long slackTime = dataNode.checkDiskErrorInterval/2; //checking for disk error - dataNode.checkDiskError(); + dataNode.checkDiskErrorAsync(); Thread.sleep(dataNode.checkDiskErrorInterval); long lastDiskErrorCheck = dataNode.getLastDiskErrorCheck(); assertTrue("Disk Error check is not performed within " + dataNode.checkDiskErrorInterval + " ms", ((Time.monotonicNow()-lastDiskErrorCheck) < (dataNode.checkDiskErrorInterval + slackTime))); Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java Thu Aug 7 07:38:23 2014 @@ -31,19 +31,24 @@ import java.util.Random; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; import org.apache.hadoop.hdfs.tools.DFSAdmin; @@ -89,6 +94,8 @@ public class TestDecommissioningStatus { 4); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000); conf.setInt(DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY, 1); + conf.setLong(DFSConfigKeys.DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY, 1); + writeConfigFile(localFileSys, excludeFile, null); writeConfigFile(localFileSys, includeFile, null); @@ -99,6 +106,7 @@ public class TestDecommissioningStatus { @AfterClass public static void tearDown() throws Exception { + if (localFileSys != null ) cleanupFile(localFileSys, dir); if(fileSys != null) fileSys.close(); if(cluster != null) cluster.shutdown(); } @@ -138,7 +146,8 @@ public class TestDecommissioningStatus { return stm; } - private void cleanupFile(FileSystem fileSys, Path name) throws IOException { + static private void cleanupFile(FileSystem fileSys, Path name) + throws IOException { assertTrue(fileSys.exists(name)); fileSys.delete(name, true); assertTrue(!fileSys.exists(name)); @@ -147,19 +156,26 @@ public class TestDecommissioningStatus { /* * Decommissions the node at the given index */ - private String decommissionNode(FSNamesystem namesystem, - DFSClient client, FileSystem localFileSys, int nodeIndex) - throws IOException { + private String decommissionNode(FSNamesystem namesystem, DFSClient client, + FileSystem localFileSys, int nodeIndex) throws IOException { DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE); String nodename = info[nodeIndex].getXferAddr(); - System.out.println("Decommissioning node: " + nodename); + decommissionNode(namesystem, localFileSys, nodename); + return nodename; + } + + /* + * Decommissions the node by name + */ + private void decommissionNode(FSNamesystem namesystem, + FileSystem localFileSys, String dnName) throws IOException { + System.out.println("Decommissioning node: " + dnName); // write nodename into the exclude file. ArrayList<String> nodes = new ArrayList<String>(decommissionedNodes); - nodes.add(nodename); + nodes.add(dnName); writeConfigFile(localFileSys, excludeFile, nodes); - return nodename; } private void checkDecommissionStatus(DatanodeDescriptor decommNode, @@ -276,6 +292,69 @@ public class TestDecommissioningStatus { st1.close(); cleanupFile(fileSys, file1); cleanupFile(fileSys, file2); - cleanupFile(localFileSys, dir); + } + + /** + * Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked + * as dead before decommission has completed. That will allow DN to resume + * the replication process after it rejoins the cluster. + */ + @Test(timeout=120000) + public void testDecommissionStatusAfterDNRestart() + throws IOException, InterruptedException { + DistributedFileSystem fileSys = + (DistributedFileSystem)cluster.getFileSystem(); + + // Create a file with one block. That block has one replica. + Path f = new Path("decommission.dat"); + DFSTestUtil.createFile(fileSys, f, fileSize, fileSize, fileSize, + (short)1, seed); + + // Find the DN that owns the only replica. + RemoteIterator<LocatedFileStatus> fileList = fileSys.listLocatedStatus(f); + BlockLocation[] blockLocations = fileList.next().getBlockLocations(); + String dnName = blockLocations[0].getNames()[0]; + + // Decommission the DN. + FSNamesystem fsn = cluster.getNamesystem(); + final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager(); + decommissionNode(fsn, localFileSys, dnName); + dm.refreshNodes(conf); + + // Stop the DN when decommission is in progress. + // Given DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY is to 1 and the size of + // the block, it will take much longer time that test timeout value for + // the decommission to complete. So when stopDataNode is called, + // decommission should be in progress. + DataNodeProperties dataNodeProperties = cluster.stopDataNode(dnName); + final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); + while (true) { + dm.fetchDatanodes(null, dead, false); + if (dead.size() == 1) { + break; + } + Thread.sleep(1000); + } + + // Force removal of the dead node's blocks. + BlockManagerTestUtil.checkHeartbeat(fsn.getBlockManager()); + + // Force DatanodeManager to check decommission state. + BlockManagerTestUtil.checkDecommissionState(dm, dead.get(0)); + + // Verify that the DN remains in DECOMMISSION_INPROGRESS state. + assertTrue("the node is in decommissioned state ", + !dead.get(0).isDecommissioned()); + + // Add the node back + cluster.restartDataNode(dataNodeProperties, true); + cluster.waitActive(); + + // Call refreshNodes on FSNamesystem with empty exclude file. + // This will remove the datanodes from decommissioning list and + // make them available again. + writeConfigFile(localFileSys, excludeFile, null); + dm.refreshNodes(conf); + cleanupFile(fileSys, f); } } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Thu Aug 7 07:38:23 2014 @@ -41,6 +41,7 @@ import java.net.InetSocketAddress; import java.nio.channels.FileChannel; import java.security.PrivilegedExceptionAction; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; @@ -63,6 +64,7 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; @@ -750,15 +752,14 @@ public class TestFsck { for (int j=0; j<=1; j++) { File storageDir = cluster.getInstanceStorageDir(i, j); File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); - File[] blocks = data_dir.listFiles(); - if (blocks == null) + List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles( + data_dir); + if (metadataFiles == null) continue; - - for (int idx = 0; idx < blocks.length; idx++) { - if (!blocks[idx].getName().startsWith("blk_")) { - continue; - } - assertTrue("Cannot remove file.", blocks[idx].delete()); + for (File metadataFile : metadataFiles) { + File blockFile = Block.metaToBlockFile(metadataFile); + assertTrue("Cannot remove file.", blockFile.delete()); + assertTrue("Cannot remove file.", metadataFile.delete()); } } } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java Thu Aug 7 07:38:23 2014 @@ -25,6 +25,7 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.Collection; +import java.util.List; import java.util.Random; import org.apache.commons.logging.Log; @@ -39,7 +40,11 @@ import org.apache.hadoop.hdfs.DFSTestUti import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.TestFileCorruption; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.server.datanode.DatanodeUtil; import org.apache.hadoop.util.StringUtils; import org.junit.Test; @@ -87,36 +92,29 @@ public class TestListCorruptFileBlocks { File storageDir = cluster.getInstanceStorageDir(0, 1); File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); assertTrue("data directory does not exist", data_dir.exists()); - File[] blocks = data_dir.listFiles(); - assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0)); - for (int idx = 0; idx < blocks.length; idx++) { - if (blocks[idx].getName().startsWith("blk_") && - blocks[idx].getName().endsWith(".meta")) { - // - // shorten .meta file - // - RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw"); - FileChannel channel = file.getChannel(); - long position = channel.size() - 2; - int length = 2; - byte[] buffer = new byte[length]; - random.nextBytes(buffer); - channel.write(ByteBuffer.wrap(buffer), position); - file.close(); - LOG.info("Deliberately corrupting file " + blocks[idx].getName() + - " at offset " + position + " length " + length); - - // read all files to trigger detection of corrupted replica - try { - util.checkFiles(fs, "/srcdat10"); - } catch (BlockMissingException e) { - System.out.println("Received BlockMissingException as expected."); - } catch (IOException e) { - assertTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " + - " but received IOException " + e, false); - } - break; - } + List<File> metaFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir); + assertTrue("Data directory does not contain any blocks or there was an " + + "IO error", metaFiles != null && !metaFiles.isEmpty()); + File metaFile = metaFiles.get(0); + RandomAccessFile file = new RandomAccessFile(metaFile, "rw"); + FileChannel channel = file.getChannel(); + long position = channel.size() - 2; + int length = 2; + byte[] buffer = new byte[length]; + random.nextBytes(buffer); + channel.write(ByteBuffer.wrap(buffer), position); + file.close(); + LOG.info("Deliberately corrupting file " + metaFile.getName() + + " at offset " + position + " length " + length); + + // read all files to trigger detection of corrupted replica + try { + util.checkFiles(fs, "/srcdat10"); + } catch (BlockMissingException e) { + System.out.println("Received BlockMissingException as expected."); + } catch (IOException e) { + assertTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " + + " but received IOException " + e, false); } // fetch bad file list from namenode. There should be one file. @@ -174,38 +172,30 @@ public class TestListCorruptFileBlocks { File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, cluster.getNamesystem().getBlockPoolId()); assertTrue("data directory does not exist", data_dir.exists()); - File[] blocks = data_dir.listFiles(); - assertTrue("Blocks do not exist in data-dir", (blocks != null) && - (blocks.length > 0)); - for (int idx = 0; idx < blocks.length; idx++) { - if (blocks[idx].getName().startsWith("blk_") && - blocks[idx].getName().endsWith(".meta")) { - // - // shorten .meta file - // - RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw"); - FileChannel channel = file.getChannel(); - long position = channel.size() - 2; - int length = 2; - byte[] buffer = new byte[length]; - random.nextBytes(buffer); - channel.write(ByteBuffer.wrap(buffer), position); - file.close(); - LOG.info("Deliberately corrupting file " + blocks[idx].getName() + - " at offset " + position + " length " + length); - - // read all files to trigger detection of corrupted replica - try { - util.checkFiles(fs, "/srcdat10"); - } catch (BlockMissingException e) { - System.out.println("Received BlockMissingException as expected."); - } catch (IOException e) { - assertTrue("Corrupted replicas not handled properly. " + - "Expecting BlockMissingException " + - " but received IOException " + e, false); - } - break; - } + List<File> metaFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir); + assertTrue("Data directory does not contain any blocks or there was an " + + "IO error", metaFiles != null && !metaFiles.isEmpty()); + File metaFile = metaFiles.get(0); + RandomAccessFile file = new RandomAccessFile(metaFile, "rw"); + FileChannel channel = file.getChannel(); + long position = channel.size() - 2; + int length = 2; + byte[] buffer = new byte[length]; + random.nextBytes(buffer); + channel.write(ByteBuffer.wrap(buffer), position); + file.close(); + LOG.info("Deliberately corrupting file " + metaFile.getName() + + " at offset " + position + " length " + length); + + // read all files to trigger detection of corrupted replica + try { + util.checkFiles(fs, "/srcdat10"); + } catch (BlockMissingException e) { + System.out.println("Received BlockMissingException as expected."); + } catch (IOException e) { + assertTrue("Corrupted replicas not handled properly. " + + "Expecting BlockMissingException " + + " but received IOException " + e, false); } // fetch bad file list from namenode. There should be one file. @@ -295,17 +285,18 @@ public class TestListCorruptFileBlocks { for (int j = 0; j <= 1; j++) { File storageDir = cluster.getInstanceStorageDir(i, j); File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); - File[] blocks = data_dir.listFiles(); - if (blocks == null) + List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles( + data_dir); + if (metadataFiles == null) continue; // assertTrue("Blocks do not exist in data-dir", (blocks != null) && // (blocks.length > 0)); - for (int idx = 0; idx < blocks.length; idx++) { - if (!blocks[idx].getName().startsWith("blk_")) { - continue; - } - LOG.info("Deliberately removing file " + blocks[idx].getName()); - assertTrue("Cannot remove file.", blocks[idx].delete()); + for (File metadataFile : metadataFiles) { + File blockFile = Block.metaToBlockFile(metadataFile); + LOG.info("Deliberately removing file " + blockFile.getName()); + assertTrue("Cannot remove file.", blockFile.delete()); + LOG.info("Deliberately removing file " + metadataFile.getName()); + assertTrue("Cannot remove file.", metadataFile.delete()); // break; } } @@ -405,17 +396,18 @@ public class TestListCorruptFileBlocks { for (int i = 0; i < 2; i++) { File storageDir = cluster.getInstanceStorageDir(0, i); File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); - File[] blocks = data_dir.listFiles(); - if (blocks == null) + List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles( + data_dir); + if (metadataFiles == null) continue; // assertTrue("Blocks do not exist in data-dir", (blocks != null) && // (blocks.length > 0)); - for (int idx = 0; idx < blocks.length; idx++) { - if (!blocks[idx].getName().startsWith("blk_")) { - continue; - } - LOG.info("Deliberately removing file " + blocks[idx].getName()); - assertTrue("Cannot remove file.", blocks[idx].delete()); + for (File metadataFile : metadataFiles) { + File blockFile = Block.metaToBlockFile(metadataFile); + LOG.info("Deliberately removing file " + blockFile.getName()); + assertTrue("Cannot remove file.", blockFile.delete()); + LOG.info("Deliberately removing file " + metadataFile.getName()); + assertTrue("Cannot remove file.", metadataFile.delete()); // break; } } @@ -482,15 +474,14 @@ public class TestListCorruptFileBlocks { File storageDir = cluster.getInstanceStorageDir(i, j); File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); LOG.info("Removing files from " + data_dir); - File[] blocks = data_dir.listFiles(); - if (blocks == null) + List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles( + data_dir); + if (metadataFiles == null) continue; - - for (int idx = 0; idx < blocks.length; idx++) { - if (!blocks[idx].getName().startsWith("blk_")) { - continue; - } - assertTrue("Cannot remove file.", blocks[idx].delete()); + for (File metadataFile : metadataFiles) { + File blockFile = Block.metaToBlockFile(metadataFile); + assertTrue("Cannot remove file.", blockFile.delete()); + assertTrue("Cannot remove file.", metadataFile.delete()); } } } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java?rev=1616428&r1=1616427&r2=1616428&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java Thu Aug 7 07:38:23 2014 @@ -50,6 +50,7 @@ import org.apache.hadoop.fs.XAttrSetFlag import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSOutputStream; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -725,7 +726,12 @@ public class TestRetryCacheWithHA { client.getNamenode().updatePipeline(client.getClientName(), oldBlock, newBlock, newNodes, storageIDs); - out.close(); + // close can fail if the out.close() commit the block after block received + // notifications from Datanode. + // Since datanodes and output stream have still old genstamps, these + // blocks will be marked as corrupt after HDFS-5723 if RECEIVED + // notifications reaches namenode first and close() will fail. + DFSTestUtil.abortStream((DFSOutputStream) out.getWrappedStream()); } @Override