shahrs87 commented on code in PR #6614: URL: https://github.com/apache/hadoop/pull/6614#discussion_r1527571968
########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + // create a file with replication 3, for rack fault tolerant BPP, + // it should allocate nodes in all 3 racks. + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack Review Comment: `4 nodes in 3rd rack.` ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + // create a file with replication 3, for rack fault tolerant BPP, + // it should allocate nodes in all 3 racks. + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), + 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testAllRackFailureDuringPipelineSetup() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + // 3 racks & 3 nodes. 1 per rack Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + // create a file with replication 3, for rack fault tolerant BPP, Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + // create a file with replication 3, for rack fault tolerant BPP, + // it should allocate nodes in all 3 racks. + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), + 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testAllRackFailureDuringPipelineSetup() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // shutdown all DNs + cluster.shutdownDataNodes(); + // create a file with replication 3, for rack fault tolerant BPP, + // it should allocate nodes in all 3 rack but fail because no DNs are present. + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), + 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + Review Comment: nit: extra line ########## hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java: ########## @@ -3526,3 +3547,5 @@ void stopAllDataxceiverThreads(FsVolumeImpl volume) { } } } + Review Comment: nit: extra lines. ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + // create a file with replication 3, for rack fault tolerant BPP, + // it should allocate nodes in all 3 racks. + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 3 nodes. 1 per rack Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN Review Comment: The comments are not correct. Can you please update them? @ritegarg -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org