[ https://issues.apache.org/jira/browse/HDFS-17299?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17827805#comment-17827805 ]
ASF GitHub Bot commented on HDFS-17299: --------------------------------------- shahrs87 commented on code in PR #6614: URL: https://github.com/apache/hadoop/pull/6614#discussion_r1527571968 ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + // create a file with replication 3, for rack fault tolerant BPP, + // it should allocate nodes in all 3 racks. + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack Review Comment: `4 nodes in 3rd rack.` ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + // create a file with replication 3, for rack fault tolerant BPP, + // it should allocate nodes in all 3 racks. + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), + 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testAllRackFailureDuringPipelineSetup() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + // 3 racks & 3 nodes. 1 per rack Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + // create a file with replication 3, for rack fault tolerant BPP, Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + // create a file with replication 3, for rack fault tolerant BPP, + // it should allocate nodes in all 3 racks. + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), + 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testAllRackFailureDuringPipelineSetup() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // shutdown all DNs + cluster.shutdownDataNodes(); + // create a file with replication 3, for rack fault tolerant BPP, + // it should allocate nodes in all 3 rack but fail because no DNs are present. + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), + 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + Review Comment: nit: extra line ########## hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java: ########## @@ -3526,3 +3547,5 @@ void stopAllDataxceiverThreads(FsVolumeImpl volume) { } } } + Review Comment: nit: extra lines. ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + cluster.stopDataNode(1); + // create a file with replication 3, for rack fault tolerant BPP, + // it should allocate nodes in all 3 racks. + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 3 nodes. 1 per rack Review Comment: The comments are not correct. Can you please update them? @ritegarg ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java: ########## @@ -1707,4 +1708,154 @@ public void testStorageFavouredNodes() assertEquals("Number of SSD should be 1 but was : " + numSSD, 1, numSSD); } } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 2); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill all the DNs in the 3rd rack. + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + + // create a file with replication 3, for rack fault tolerant BPP, + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } + } + + @Test + public void testSingleRackFailureDuringPipelineSetupMinReplicationImpossible() + throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, BlockPlacementPolicy.class); + conf.setBoolean(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.MIN_REPLICATION, 3); + // 3 racks & 6 nodes. 1 per rack for 2 racks and 3 nodes in the 3rd rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN + cluster.stopDataNode(5); + cluster.stopDataNode(4); + cluster.stopDataNode(3); + cluster.stopDataNode(2); + boolean threw = false; + try { + DFSTestUtil.createFile(fs, new Path("/testFile"), 1024L, (short) 3, 1024L); + } catch (IOException e) { + threw = true; + } + assertTrue(threw); + } + } + + @Test + public void testMultipleRackFailureDuringPipelineSetupMinReplicationPossible() throws Exception { + Configuration conf = getTestConfiguration(); + conf.setClass( + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + BlockPlacementPolicyRackFaultTolerant.class, + BlockPlacementPolicy.class); + conf.setBoolean( + HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.ENABLE_KEY, + false); + conf.setInt(HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure. + MIN_REPLICATION, 1); + // 3 racks & 3 nodes. 1 per rack + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(6) + .racks(new String[] {"/rack1", "/rack2", "/rack3", "/rack3", "/rack3", "/rack3"}).build()) { + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + // kill one DN, so only 2 racks stays with active DN Review Comment: The comments are not correct. Can you please update them? @ritegarg > HDFS is not rack failure tolerant while creating a new file. > ------------------------------------------------------------ > > Key: HDFS-17299 > URL: https://issues.apache.org/jira/browse/HDFS-17299 > Project: Hadoop HDFS > Issue Type: Bug > Affects Versions: 2.10.1 > Reporter: Rushabh Shah > Assignee: Ritesh > Priority: Critical > Labels: pull-request-available > Fix For: 3.3.9, 3.4.1, 3.5.0 > > Attachments: repro.patch > > > Recently we saw an HBase cluster outage when we mistakenly brought down 1 AZ. > Our configuration: > 1. We use 3 Availability Zones (AZs) for fault tolerance. > 2. We use BlockPlacementPolicyRackFaultTolerant as the block placement policy. > 3. We use the following configuration parameters: > dfs.namenode.heartbeat.recheck-interval: 600000 > dfs.heartbeat.interval: 3 > So it will take 1230000 ms (20.5mins) to detect that datanode is dead. > > Steps to reproduce: > # Bring down 1 AZ. > # HBase (HDFS client) tries to create a file (WAL file) and then calls > hflush on the newly created file. > # DataStreamer is not able to find blocks locations that satisfies the rack > placement policy (one copy in each rack which essentially means one copy in > each AZ) > # Since all the datanodes in that AZ are down but still alive to namenode, > the client gets different datanodes but still all of them are in the same AZ. > See logs below. > # HBase is not able to create a WAL file and it aborts the region server. > > Relevant logs from hdfs client and namenode > > {noformat} > 2023-12-16 17:17:43,818 INFO [on default port 9000] FSNamesystem.audit - > allowed=true ugi=hbase/<rs-name> (auth:KERBEROS) ip=<rs-IP> > cmd=create src=/hbase/WALs/<WAL-file> dst=null > 2023-12-16 17:17:43,978 INFO [on default port 9000] hdfs.StateChange - > BLOCK* allocate blk_1214652565_140946716, replicas=<AZ-1-dn-1>:50010, > <AZ-2-dn-1>:50010, <AZ-3-dn-1>:50010 for /hbase/WALs/<WAL-file> > 2023-12-16 17:17:44,061 INFO [Thread-39087] hdfs.DataStreamer - Exception in > createBlockOutputStream > java.io.IOException: Got error, status=ERROR, status message , ack with > firstBadLink as <AZ-2-dn-1>:50010 > at > org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil.checkBlockOpStatus(DataTransferProtoUtil.java:113) > at > org.apache.hadoop.hdfs.DataStreamer.createBlockOutputStream(DataStreamer.java:1747) > at > org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1651) > at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:715) > 2023-12-16 17:17:44,061 WARN [Thread-39087] hdfs.DataStreamer - Abandoning > BP-179318874-<NN-IP>-1594838129323:blk_1214652565_140946716 > 2023-12-16 17:17:44,179 WARN [Thread-39087] hdfs.DataStreamer - Excluding > datanode > DatanodeInfoWithStorage[<AZ-2-dn-1>:50010,DS-a493abdb-3ac3-49b1-9bfb-848baf5c1c2c,DISK] > 2023-12-16 17:17:44,339 INFO [on default port 9000] hdfs.StateChange - > BLOCK* allocate blk_1214652580_140946764, replicas=<AZ-1-dn-2>:50010, > <AZ-3-dn-2>:50010, <AZ-2-dn-2>:50010 for /hbase/WALs/<WAL-file> > 2023-12-16 17:17:44,369 INFO [Thread-39087] hdfs.DataStreamer - Exception in > createBlockOutputStream > java.io.IOException: Got error, status=ERROR, status message , ack with > firstBadLink as <AZ-2-dn-2>:50010 > at > org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil.checkBlockOpStatus(DataTransferProtoUtil.java:113) > at > org.apache.hadoop.hdfs.DataStreamer.createBlockOutputStream(DataStreamer.java:1747) > at > org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1651) > at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:715) > 2023-12-16 17:17:44,369 WARN [Thread-39087] hdfs.DataStreamer - Abandoning > BP-179318874-NN-IP-1594838129323:blk_1214652580_140946764 > 2023-12-16 17:17:44,454 WARN [Thread-39087] hdfs.DataStreamer - Excluding > datanode > DatanodeInfoWithStorage[AZ-2-dn-2:50010,DS-46bb45cc-af89-46f3-9f9d-24e4fdc35b6d,DISK] > 2023-12-16 17:17:44,522 INFO [on default port 9000] hdfs.StateChange - > BLOCK* allocate blk_1214652594_140946796, replicas=<AZ-1-dn-2>:50010, > <AZ-2-dn-3>:50010, <AZ-3-dn-3>:50010 for /hbase/WALs/<WAL-file> > 2023-12-16 17:17:44,712 INFO [Thread-39087] hdfs.DataStreamer - Exception in > createBlockOutputStream > java.io.IOException: Got error, status=ERROR, status message , ack with > firstBadLink as <AZ-2-dn-3>:50010 > at > org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil.checkBlockOpStatus(DataTransferProtoUtil.java:113) > at > org.apache.hadoop.hdfs.DataStreamer.createBlockOutputStream(DataStreamer.java:1747) > at > org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1651) > at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:715) > 2023-12-16 17:17:44,712 WARN [Thread-39087] hdfs.DataStreamer - Abandoning > BP-179318874-NN-IP-1594838129323:blk_1214652594_140946796 > 2023-12-16 17:17:44,732 WARN [Thread-39087] hdfs.DataStreamer - Excluding > datanode > DatanodeInfoWithStorage[<AZ-2-dn-3>:50010,DS-86b77463-a26f-4f42-ae1b-21b75c407203,DISK] > 2023-12-16 17:17:44,855 INFO [on default port 9000] hdfs.StateChange - > BLOCK* allocate blk_1214652607_140946850, replicas=<AZ-1-dn-4>:50010, > <AZ-2-dn-4>:50010, <AZ-3-dn-4>:50010 for /hbase/WALs/<WAL-file> > 2023-12-16 17:17:44,867 INFO [Thread-39087] hdfs.DataStreamer - Exception in > createBlockOutputStream > java.io.IOException: Got error, status=ERROR, status message , ack with > firstBadLink as <AZ-2-dn-4>:50010 > at > org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil.checkBlockOpStatus(DataTransferProtoUtil.java:113) > at > org.apache.hadoop.hdfs.DataStreamer.createBlockOutputStream(DataStreamer.java:1747) > at > org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1651) > at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:715) > 2023-12-16 17:17:44,988 WARN [Thread-39087] hdfs.DataStreamer - DataStreamer > Exception > java.io.IOException: Unable to create new block. > at > org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1665) > at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:715) > 2023-12-16 17:17:44,988 WARN [Thread-39087] hdfs.DataStreamer - Could not > get block locations. Source file "/hbase/WALs/<WAL-file>" - Aborting... > {noformat} > > *Proposed fix:* > Client always correctly identifies the bad datanode in the pipeline. > The number of retries dfs client makes is controlled by > dfs.client.block.write.retries (defaults to 3). So in total it tries 4 times > to create the pipeline. > So on the 3rd or 4th attempt, if we see all the excluded nodes in the > pipeline belongs to the same rack, we can pass the hint to namenode to > exclude that rack for the next attempt. > Once that rack is back online, Replication monitor will handle to replicate > that block to that rack. > -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org