xiaoyuyao commented on issue #291: HDDS-1574 Average out pipeline allocation on datanodes and add metrcs/test URL: https://github.com/apache/hadoop-ozone/pull/291#issuecomment-566098303 Thanks @timmylicheng for the update. The latest change LGTM. Wrt. the failures in testCloseContainerEventWithRatis. I spend some time and debug it. After we allow background pipeline thread to create more than one pipelines per datanode, the OZONE_SCM_PIPELINE_NUMBER_LIMIT should always be set properly in the test. Otherwise, this will run indefinitely and timeout the even queue processing for some of those with tight timeout (e.g., 1s in this case). Also, when the limit is reached, we throw exceptions which ends up with a lot of false alarm error logs and the pipeline failure counter incremented. We can fix that in follow up JIRAs. Here is a proposed fix for testCloseContainerEventWithRatis, which you can include in this PR. ``` TestCloseContainerEventHandler.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.TestUtils; import org.apache.hadoop.hdds.scm.pipeline.MockRatisPipelineProvider; import org.apache.hadoop.hdds.scm.pipeline.PipelineProvider; @@ -67,22 +68,25 @@ public static void setUp() throws Exception { .getTestDir(TestCloseContainerEventHandler.class.getSimpleName()); configuration .set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); + configuration.setInt( + ScmConfigKeys.OZONE_SCM_PIPELINE_NUMBER_LIMIT, 16); + nodeManager = new MockNodeManager(true, 10); eventQueue = new EventQueue(); pipelineManager = new SCMPipelineManager(configuration, nodeManager, eventQueue); PipelineProvider mockRatisProvider = new MockRatisPipelineProvider(nodeManager, - pipelineManager.getStateManager(), configuration); + pipelineManager.getStateManager(), configuration, eventQueue); pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, mockRatisProvider); containerManager = new SCMContainerManager(configuration, nodeManager, pipelineManager, new EventQueue()); - pipelineManager.triggerPipelineCreation(); eventQueue.addHandler(CLOSE_CONTAINER, new CloseContainerEventHandler(pipelineManager, containerManager)); eventQueue.addHandler(DATANODE_COMMAND, nodeManager); + pipelineManager.triggerPipelineCreation(); // Move all pipelines created by background from ALLOCATED to OPEN state Thread.sleep(2000); TestUtils.openAllRatisPipelines(pipelineManager); @@ -93,6 +97,9 @@ public static void tearDown() throws Exception { if (containerManager != null) { containerManager.close(); } + if (pipelineManager != null) { + pipelineManager.close(); + } FileUtil.fullyDelete(testDir); } ```
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
