This is an automated email from the ASF dual-hosted git repository.
siddhant pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 0f5de57443 HDDS-10042. Show IDs of under-replicated and unclosed
containers for decommissioning nodes (#5929)
0f5de57443 is described below
commit 0f5de574434d18f40ca7e3a56d201758180be79a
Author: Tejaskriya <[email protected]>
AuthorDate: Wed Jan 24 11:15:11 2024 +0530
HDDS-10042. Show IDs of under-replicated and unclosed containers for
decommissioning nodes (#5929)
---
.../apache/hadoop/hdds/scm/client/ScmClient.java | 10 +++++
.../protocol/StorageContainerLocationProtocol.java | 9 ++++
...inerLocationProtocolClientSideTranslatorPB.java | 21 +++++++++
.../src/main/proto/ScmAdminProtocol.proto | 16 +++++++
.../hadoop/hdds/scm/node/DatanodeAdminMonitor.java | 6 +++
.../hdds/scm/node/DatanodeAdminMonitorImpl.java | 35 +++++++++++----
.../hdds/scm/node/NodeDecommissionManager.java | 7 +++
...inerLocationProtocolServerSideTranslatorPB.java | 26 +++++++++++
.../hdds/scm/server/SCMClientProtocolServer.java | 9 ++++
.../hdds/scm/node/TestDatanodeAdminMonitor.java | 44 +++++++++++++++++++
.../hdds/scm/cli/ContainerOperationClient.java | 5 +++
.../cli/datanode/DecommissionStatusSubCommand.java | 4 ++
.../datanode/TestDecommissionStatusSubCommand.java | 50 ++++++++++++++++++++++
13 files changed, 233 insertions(+), 9 deletions(-)
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
index 120535405e..402398e36c 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
@@ -20,10 +20,12 @@ package org.apache.hadoop.hdds.scm.client;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.annotation.InterfaceStability;
import org.apache.hadoop.hdds.client.ReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import
org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerReplicaInfo;
import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
import
org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
@@ -179,6 +181,14 @@ public interface ScmClient extends Closeable {
HddsProtos.ReplicationFactor replicationFactor,
String owner) throws IOException;
+ /**
+ * Gets the list of underReplicated and unClosed containers on a
decommissioning node.
+ *
+ * @param dn - Datanode detail
+ * @return Lists of underReplicated and Unclosed containers
+ */
+ Map<String, List<ContainerID>> getContainersOnDecomNode(DatanodeDetails dn)
throws IOException;
+
/**
* Returns a set of Nodes that meet a query criteria. Passing null for
opState
* or nodeState acts like a wild card, returning all nodes in that state.
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
index be0f41b622..dabdc0b822 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdds.scm.protocol;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.client.ReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import
org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
@@ -220,6 +221,14 @@ public interface StorageContainerLocationProtocol extends
Closeable {
*/
void deleteContainer(long containerID) throws IOException;
+ /**
+ * Gets the list of underReplicated and unClosed containers on a
decommissioning node.
+ *
+ * @param dn - Datanode detail
+ * @return Lists of underReplicated and unClosed containers
+ */
+ Map<String, List<ContainerID>> getContainersOnDecomNode(DatanodeDetails dn)
throws IOException;
+
/**
* Queries a list of Node Statuses. Passing a null for either opState or
* state acts like a wildcard returning all nodes in that state.
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
index eb3f419e48..84a0fa4886 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.annotation.InterfaceAudience;
import org.apache.hadoop.hdds.client.ECReplicationConfig;
import org.apache.hadoop.hdds.client.ReplicatedReplicationConfig;
import org.apache.hadoop.hdds.client.ReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import
org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import
org.apache.hadoop.hdds.protocol.proto.HddsProtos.GetScmInfoResponseProto;
@@ -55,6 +56,9 @@ import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolPro
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitSafeModeResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerRequestProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerReplicasRequestProto;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeRequestProto;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainersOnDecomNodeProto;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerTokenRequestProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerTokenResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineBatchRequestProto;
@@ -459,6 +463,23 @@ public final class
StorageContainerLocationProtocolClientSideTranslatorPB
}
+ @Override
+ public Map<String, List<ContainerID>>
getContainersOnDecomNode(DatanodeDetails dn) throws IOException {
+ GetContainersOnDecomNodeRequestProto request =
GetContainersOnDecomNodeRequestProto.newBuilder()
+ .setDatanodeDetails(dn.getProtoBufMessage()).build();
+ GetContainersOnDecomNodeResponseProto response =
submitRequest(Type.GetContainersOnDecomNode,
+ builder ->
builder.setGetContainersOnDecomNodeRequest(request)).getGetContainersOnDecomNodeResponse();
+ Map<String, List<ContainerID>> containerMap = new HashMap<>();
+ for (ContainersOnDecomNodeProto containersProto :
response.getContainersOnDecomNodeList()) {
+ List<ContainerID> containerIds = new ArrayList<>();
+ for (HddsProtos.ContainerID id : containersProto.getIdList()) {
+ containerIds.add(ContainerID.getFromProtobuf(id));
+ }
+ containerMap.put(containersProto.getName(), containerIds);
+ }
+ return containerMap;
+ }
+
/**
* Queries a list of Nodes based on their operational state or health state.
* Passing a null for either value acts as a wildcard for that state.
diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
index 6cfddcc2f6..6adca817ed 100644
--- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
+++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
@@ -82,6 +82,7 @@ message ScmContainerLocationRequest {
optional GetFailedDeletedBlocksTxnRequestProto
getFailedDeletedBlocksTxnRequest = 43;
optional DecommissionScmRequestProto decommissionScmRequest = 44;
optional SingleNodeQueryRequestProto singleNodeQueryRequest = 45;
+ optional GetContainersOnDecomNodeRequestProto
getContainersOnDecomNodeRequest = 46;
}
message ScmContainerLocationResponse {
@@ -135,6 +136,7 @@ message ScmContainerLocationResponse {
optional GetFailedDeletedBlocksTxnResponseProto
getFailedDeletedBlocksTxnResponse = 43;
optional DecommissionScmResponseProto decommissionScmResponse = 44;
optional SingleNodeQueryResponseProto singleNodeQueryResponse = 45;
+ optional GetContainersOnDecomNodeResponseProto
getContainersOnDecomNodeResponse = 46;
enum Status {
OK = 1;
@@ -187,6 +189,7 @@ enum Type {
GetFailedDeletedBlocksTransaction = 39;
DecommissionScm = 40;
SingleNodeQuery = 41;
+ GetContainersOnDecomNode = 42;
}
/**
@@ -602,6 +605,19 @@ message DecommissionScmResponseProto {
optional string errorMsg = 2;
}
+message GetContainersOnDecomNodeRequestProto {
+ required DatanodeDetailsProto datanodeDetails = 1;
+}
+
+message ContainersOnDecomNodeProto {
+ required string name = 1;
+ repeated ContainerID id = 2;
+}
+
+message GetContainersOnDecomNodeResponseProto {
+ repeated ContainersOnDecomNodeProto containersOnDecomNode = 1;
+}
+
/**
* Protocol used from an HDFS node to StorageContainerManager. See the request
* and response messages for details of the RPC calls.
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
index e0b4c3ce54..fbfbb49c25 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
@@ -18,7 +18,11 @@
package org.apache.hadoop.hdds.scm.node;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
+import java.util.List;
+import java.util.Map;
import java.util.Set;
/**
@@ -31,4 +35,6 @@ public interface DatanodeAdminMonitor extends Runnable {
void stopMonitoring(DatanodeDetails dn);
Set<DatanodeAdminMonitorImpl.TrackedNode> getTrackedNodes();
void setMetrics(NodeDecommissionMetrics metrics);
+ Map<String, List<ContainerID>> getContainersReplicatedOnNode(DatanodeDetails
dn)
+ throws NodeNotFoundException;
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
index 51c6d12dea..d7975ff1e5 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
@@ -96,8 +96,8 @@ public class DatanodeAdminMonitorImpl implements
DatanodeAdminMonitor {
public static final class TrackedNode {
private DatanodeDetails datanodeDetails;
-
private long startTime = 0L;
+ private Map<String, List<ContainerID>> containersReplicatedOnNode = new
ConcurrentHashMap<>();
public TrackedNode(DatanodeDetails datanodeDetails, long startTime) {
this.datanodeDetails = datanodeDetails;
@@ -122,6 +122,15 @@ public class DatanodeAdminMonitorImpl implements
DatanodeAdminMonitor {
public long getStartTime() {
return startTime;
}
+
+ public Map<String, List<ContainerID>> getContainersReplicatedOnNode() {
+ return containersReplicatedOnNode;
+ }
+
+ public void setContainersReplicatedOnNode(List<ContainerID>
underReplicated, List<ContainerID> unClosed) {
+ this.containersReplicatedOnNode.put("UnderReplicated",
Collections.unmodifiableList(underReplicated));
+ this.containersReplicatedOnNode.put("UnClosed",
Collections.unmodifiableList(unClosed));
+ }
}
private Map<String, ContainerStateInWorkflow> containerStateByHost;
@@ -423,9 +432,7 @@ public class DatanodeAdminMonitorImpl implements
DatanodeAdminMonitor {
boolean isHealthy = replicaSet.isHealthyEnoughForOffline();
if (!isHealthy) {
- if (LOG.isDebugEnabled()) {
- unClosedIDs.add(cid);
- }
+ unClosedIDs.add(cid);
if (unclosed < containerDetailsLoggingLimit
|| LOG.isDebugEnabled()) {
LOG.info("Unclosed Container {} {}; {}", cid, replicaSet,
replicaDetails(replicaSet.getReplicas()));
@@ -448,20 +455,18 @@ public class DatanodeAdminMonitorImpl implements
DatanodeAdminMonitor {
replicationManager.checkContainerStatus(replicaSet.getContainer(),
report);
replicatedOK =
report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED) == 0;
}
-
if (replicatedOK) {
sufficientlyReplicated++;
} else {
- if (LOG.isDebugEnabled()) {
- underReplicatedIDs.add(cid);
- }
+ underReplicatedIDs.add(cid);
if (underReplicated < containerDetailsLoggingLimit ||
LOG.isDebugEnabled()) {
LOG.info("Under Replicated Container {} {}; {}", cid, replicaSet,
replicaDetails(replicaSet.getReplicas()));
}
underReplicated++;
}
} catch (ContainerNotFoundException e) {
- LOG.warn("ContainerID {} present in node list for {} but not found in
containerManager", cid, dn);
+ LOG.warn("ContainerID {} present in node list for {} but not found in
containerManager", cid,
+ dn.getDatanodeDetails());
}
}
LOG.info("{} has {} sufficientlyReplicated, {} deleting, {} " +
@@ -485,9 +490,21 @@ public class DatanodeAdminMonitorImpl implements
DatanodeAdminMonitor {
unclosed, unClosedIDs.stream().map(
Object::toString).collect(Collectors.joining(", ")));
}
+ dn.setContainersReplicatedOnNode(underReplicatedIDs, unClosedIDs);
return underReplicated == 0 && unclosed == 0;
}
+ public Map<String, List<ContainerID>>
getContainersReplicatedOnNode(DatanodeDetails dn) {
+ Iterator<TrackedNode> iterator = trackedNodes.iterator();
+ while (iterator.hasNext()) {
+ TrackedNode trackedNode = iterator.next();
+ if (trackedNode.equals(new TrackedNode(dn, 0L))) {
+ return trackedNode.getContainersReplicatedOnNode();
+ }
+ }
+ return new HashMap<>();
+ }
+
private String replicaDetails(Collection<ContainerReplica> replicas) {
StringBuilder sb = new StringBuilder();
sb.append("Replicas{");
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
index c98cc63c46..38e59b89e7 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
@@ -40,6 +41,7 @@ import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
@@ -292,6 +294,11 @@ public class NodeDecommissionManager {
TimeUnit.SECONDS);
}
+ public Map<String, List<ContainerID>>
getContainersReplicatedOnNode(DatanodeDetails dn)
+ throws NodeNotFoundException {
+ return getMonitor().getContainersReplicatedOnNode(dn);
+ }
+
@VisibleForTesting
public DatanodeAdminMonitor getMonitor() {
return monitor;
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
index 6d47a78a7d..f402b9309f 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
@@ -24,6 +24,7 @@ import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.annotation.InterfaceAudience;
import org.apache.hadoop.hdds.client.ECReplicationConfig;
import org.apache.hadoop.hdds.client.ReplicationConfig;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import
org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipRequestProto;
import
org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipResponseProto;
@@ -51,6 +52,9 @@ import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolPro
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitSafeModeResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerReplicasRequestProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerReplicasResponseProto;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeRequestProto;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainersOnDecomNodeProto;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerRequestProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerTokenRequestProto;
@@ -614,6 +618,12 @@ public final class
StorageContainerLocationProtocolServerSideTranslatorPB
.setDecommissionNodesResponse(decommissionNodes(
request.getDecommissionNodesRequest()))
.build();
+ case GetContainersOnDecomNode:
+ return ScmContainerLocationResponse.newBuilder()
+ .setCmdType(request.getCmdType())
+ .setStatus(Status.OK)
+
.setGetContainersOnDecomNodeResponse(getContainersOnDecomNode(request.getGetContainersOnDecomNodeRequest()))
+ .build();
case RecommissionNodes:
return ScmContainerLocationResponse.newBuilder()
.setCmdType(request.getCmdType())
@@ -1160,6 +1170,22 @@ public final class
StorageContainerLocationProtocolServerSideTranslatorPB
return response.build();
}
+ public GetContainersOnDecomNodeResponseProto
getContainersOnDecomNode(GetContainersOnDecomNodeRequestProto request)
+ throws IOException {
+ Map<String, List<ContainerID>> containerMap =
impl.getContainersOnDecomNode(
+ DatanodeDetails.getFromProtoBuf(request.getDatanodeDetails()));
+ List<ContainersOnDecomNodeProto> containersProtoList = new ArrayList<>();
+ for (Map.Entry<String, List<ContainerID>> containerList :
containerMap.entrySet()) {
+ List<HddsProtos.ContainerID> containerIdsProto = new ArrayList<>();
+ for (ContainerID id : containerList.getValue()) {
+ containerIdsProto.add(id.getProtobuf());
+ }
+
containersProtoList.add(ContainersOnDecomNodeProto.newBuilder().setName(containerList.getKey())
+ .addAllId(containerIdsProto).build());
+ }
+ return
GetContainersOnDecomNodeResponseProto.newBuilder().addAllContainersOnDecomNode(containersProtoList).build();
+ }
+
public RecommissionNodesResponseProto recommissionNodes(
RecommissionNodesRequestProto request) throws IOException {
List<DatanodeAdminError> errors =
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
index ac92ea893d..13bef8590b 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
@@ -588,6 +588,15 @@ public class SCMClientProtocolServer implements
}
}
+ @Override
+ public Map<String, List<ContainerID>>
getContainersOnDecomNode(DatanodeDetails dn) throws IOException {
+ try {
+ return scm.getScmDecommissionManager().getContainersReplicatedOnNode(dn);
+ } catch (NodeNotFoundException e) {
+ throw new IOException("Failed to get containers list. Unable to find
required node", e);
+ }
+ }
+
@Override
public List<HddsProtos.Node> queryNode(
HddsProtos.NodeOperationalState opState, HddsProtos.NodeState state,
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
index a2df04742f..f4002a7da1 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
@@ -836,6 +836,50 @@ public class TestDatanodeAdminMonitor {
nodeManager.getNodeStatus(dn1).getOperationalState());
}
+ @Test
+ public void testContainersReplicatedOnDecomDnAPI()
+ throws NodeNotFoundException, ContainerNotFoundException {
+ conf.setBoolean("hdds.scm.replication.enable.legacy", false);
+
+ DatanodeDetails dn1 = MockDatanodeDetails.randomDatanodeDetails();
+ nodeManager.register(dn1,
+ new NodeStatus(HddsProtos.NodeOperationalState.DECOMMISSIONING,
+ HddsProtos.NodeState.HEALTHY));
+
+ Set<ContainerID> containers = new HashSet<>();
+ containers.add(ContainerID.valueOf(1));
+ containers.add(ContainerID.valueOf(2));
+ nodeManager.setContainers(dn1, containers);
+ DatanodeAdminMonitorTestUtil
+ .mockGetContainerReplicaCount(repManager,
+ true,
+ HddsProtos.LifeCycleState.CLOSED,
+ DECOMMISSIONING,
+ IN_SERVICE,
+ IN_SERVICE);
+
+ monitor.startMonitoring(dn1);
+ monitor.run();
+ assertEquals(1, monitor.getTrackedNodeCount());
+ assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
+ nodeManager.getNodeStatus(dn1).getOperationalState());
+
assertEquals(monitor.getContainersReplicatedOnNode(dn1).get("UnderReplicated").size(),
2);
+
assertEquals(monitor.getContainersReplicatedOnNode(dn1).get("UnClosed").size(),
0);
+
+ DatanodeAdminMonitorTestUtil
+ .mockGetContainerReplicaCount(repManager,
+ true,
+ HddsProtos.LifeCycleState.OPEN,
+ IN_SERVICE);
+
+ monitor.run();
+ assertEquals(1, monitor.getTrackedNodeCount());
+ assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
+ nodeManager.getNodeStatus(dn1).getOperationalState());
+
assertEquals(monitor.getContainersReplicatedOnNode(dn1).get("UnderReplicated").size(),
0);
+
assertEquals(monitor.getContainersReplicatedOnNode(dn1).get("UnClosed").size(),
2);
+ }
+
/**
* Generate a set of ContainerID, starting from an ID of zero up to the given
* count minus 1.
diff --git
a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
index 1daffbb9b9..d07e696e7e 100644
---
a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
+++
b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
@@ -216,6 +216,11 @@ public class ContainerOperationClient implements ScmClient
{
}
}
+ @Override
+ public Map<String, List<ContainerID>>
getContainersOnDecomNode(DatanodeDetails dn) throws IOException {
+ return storageContainerLocationClient.getContainersOnDecomNode(dn);
+ }
+
@Override
public List<HddsProtos.Node> queryNode(
HddsProtos.NodeOperationalState opState,
diff --git
a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DecommissionStatusSubCommand.java
b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DecommissionStatusSubCommand.java
index bbf1d84076..b53632f8ee 100644
---
a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DecommissionStatusSubCommand.java
+++
b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DecommissionStatusSubCommand.java
@@ -23,10 +23,12 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.cli.ScmSubcommand;
import org.apache.hadoop.hdds.scm.client.ScmClient;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
import picocli.CommandLine;
import java.io.IOException;
import java.util.List;
+import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -83,6 +85,8 @@ public class DecommissionStatusSubCommand extends
ScmSubcommand {
DatanodeDetails datanode = DatanodeDetails.getFromProtoBuf(
node.getNodeID());
printDetails(datanode);
+ Map<String, List<ContainerID>> containers =
scmClient.getContainersOnDecomNode(datanode);
+ System.out.println(containers);
}
}
private void printDetails(DatanodeDetails datanode) {
diff --git
a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java
b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java
index 902ee5e7a8..ed05d6f0e9 100644
---
a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java
+++
b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java
@@ -17,8 +17,10 @@
*/
package org.apache.hadoop.hdds.scm.cli.datanode;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.client.ScmClient;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -29,7 +31,9 @@ import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -55,6 +59,7 @@ public class TestDecommissionStatusSubCommand {
private final PrintStream originalErr = System.err;
private DecommissionStatusSubCommand cmd;
private List<HddsProtos.Node> nodes = getNodeDetails(2);
+ private Map<String, List<ContainerID>> containerOnDecom =
getContainersOnDecomNodes();
@BeforeEach
public void setup() throws UnsupportedEncodingException {
@@ -74,6 +79,7 @@ public class TestDecommissionStatusSubCommand {
ScmClient scmClient = mock(ScmClient.class);
when(scmClient.queryNode(any(), any(), any(), any()))
.thenAnswer(invocation -> nodes); // 2 nodes decommissioning
+
when(scmClient.getContainersOnDecomNode(any())).thenReturn(containerOnDecom);
cmd.execute(scmClient);
Pattern p = Pattern.compile("Decommission\\sStatus:\\s" +
@@ -85,9 +91,15 @@ public class TestDecommissionStatusSubCommand {
p = Pattern.compile("Datanode:\\s.*host0\\)");
m = p.matcher(outContent.toString(DEFAULT_ENCODING));
assertTrue(m.find());
+ p = Pattern.compile("host0.*[\r\n].*UnderReplicated.*UnClosed",
Pattern.MULTILINE);
+ m = p.matcher(outContent.toString(DEFAULT_ENCODING));
+ assertTrue(m.find());
p = Pattern.compile("Datanode:\\s.*host1\\)");
m = p.matcher(outContent.toString(DEFAULT_ENCODING));
assertTrue(m.find());
+ p = Pattern.compile("host1.*[\r\n].*UnderReplicated.*UnClosed",
Pattern.MULTILINE);
+ m = p.matcher(outContent.toString(DEFAULT_ENCODING));
+ assertTrue(m.find());
}
@Test
@@ -96,6 +108,7 @@ public class TestDecommissionStatusSubCommand {
// No nodes in decommissioning. No error is printed
when(scmClient.queryNode(any(), any(), any(), any()))
.thenReturn(new ArrayList<>());
+ when(scmClient.getContainersOnDecomNode(any())).thenReturn(new
HashMap<>());
cmd.execute(scmClient);
Pattern p = Pattern.compile("Decommission\\sStatus:\\s" +
@@ -117,6 +130,7 @@ public class TestDecommissionStatusSubCommand {
ScmClient scmClient = mock(ScmClient.class);
when(scmClient.queryNode(any(), any(), any(), any()))
.thenAnswer(invocation -> nodes); // 2 nodes decommissioning
+
when(scmClient.getContainersOnDecomNode(any())).thenReturn(containerOnDecom);
CommandLine c = new CommandLine(cmd);
c.parseArgs("--id", nodes.get(0).getNodeID().getUuid());
@@ -125,11 +139,17 @@ public class TestDecommissionStatusSubCommand {
Pattern p = Pattern.compile("Datanode:\\s.*host0\\)", Pattern.MULTILINE);
Matcher m = p.matcher(outContent.toString(DEFAULT_ENCODING));
assertTrue(m.find());
+ p = Pattern.compile("host0.*[\r\n].*UnderReplicated.*UnClosed",
Pattern.MULTILINE);
+ m = p.matcher(outContent.toString(DEFAULT_ENCODING));
+ assertTrue(m.find());
// as uuid of only host0 is passed, host1 should NOT be displayed
p = Pattern.compile("Datanode:\\s.*host1.\\)", Pattern.MULTILINE);
m = p.matcher(outContent.toString(DEFAULT_ENCODING));
assertFalse(m.find());
+ p = Pattern.compile("host1.*[\r\n].*UnderReplicated.*UnClosed",
Pattern.MULTILINE);
+ m = p.matcher(outContent.toString(DEFAULT_ENCODING));
+ assertFalse(m.find());
}
@Test
@@ -137,6 +157,10 @@ public class TestDecommissionStatusSubCommand {
ScmClient scmClient = mock(ScmClient.class);
when(scmClient.queryNode(any(), any(), any(), any()))
.thenAnswer(invocation -> nodes.subList(0, 1)); // host0
decommissioning
+
when(scmClient.getContainersOnDecomNode(DatanodeDetails.getFromProtoBuf(nodes.get(0).getNodeID())))
+ .thenReturn(containerOnDecom);
+
when(scmClient.getContainersOnDecomNode(DatanodeDetails.getFromProtoBuf(nodes.get(1).getNodeID())))
+ .thenReturn(new HashMap<>());
CommandLine c = new CommandLine(cmd);
c.parseArgs("--id", nodes.get(1).getNodeID().getUuid());
@@ -161,6 +185,7 @@ public class TestDecommissionStatusSubCommand {
ScmClient scmClient = mock(ScmClient.class);
when(scmClient.queryNode(any(), any(), any(), any()))
.thenAnswer(invocation -> nodes); // 2 nodes decommissioning
+
when(scmClient.getContainersOnDecomNode(any())).thenReturn(containerOnDecom);
CommandLine c = new CommandLine(cmd);
c.parseArgs("--ip", nodes.get(1).getNodeID().getIpAddress());
@@ -169,11 +194,17 @@ public class TestDecommissionStatusSubCommand {
Pattern p = Pattern.compile("Datanode:\\s.*host1\\)", Pattern.MULTILINE);
Matcher m = p.matcher(outContent.toString(DEFAULT_ENCODING));
assertTrue(m.find());
+ p = Pattern.compile("host1.*[\r\n].*UnderReplicated.*UnClosed",
Pattern.MULTILINE);
+ m = p.matcher(outContent.toString(DEFAULT_ENCODING));
+ assertTrue(m.find());
// as IpAddress of only host1 is passed, host0 should NOT be displayed
p = Pattern.compile("Datanode:\\s.*host0.\\)", Pattern.MULTILINE);
m = p.matcher(outContent.toString(DEFAULT_ENCODING));
assertFalse(m.find());
+ p = Pattern.compile("host0.*[\r\n].*UnderReplicated.*UnClosed",
Pattern.MULTILINE);
+ m = p.matcher(outContent.toString(DEFAULT_ENCODING));
+ assertFalse(m.find());
}
@Test
@@ -181,6 +212,10 @@ public class TestDecommissionStatusSubCommand {
ScmClient scmClient = mock(ScmClient.class);
when(scmClient.queryNode(any(), any(), any(), any()))
.thenAnswer(invocation -> nodes.subList(0, 1)); // host0
decommissioning
+
when(scmClient.getContainersOnDecomNode(DatanodeDetails.getFromProtoBuf(nodes.get(0).getNodeID())))
+ .thenReturn(containerOnDecom);
+
when(scmClient.getContainersOnDecomNode(DatanodeDetails.getFromProtoBuf(nodes.get(1).getNodeID())))
+ .thenReturn(new HashMap<>());
CommandLine c = new CommandLine(cmd);
c.parseArgs("--ip", nodes.get(1).getNodeID().getIpAddress());
@@ -225,4 +260,19 @@ public class TestDecommissionStatusSubCommand {
return nodesList;
}
+ private Map<String, List<ContainerID>> getContainersOnDecomNodes() {
+ Map<String, List<ContainerID>> containerMap = new HashMap<>();
+ List<ContainerID> underReplicated = new ArrayList<>();
+ underReplicated.add(new ContainerID(1L));
+ underReplicated.add(new ContainerID(2L));
+ underReplicated.add(new ContainerID(3L));
+ containerMap.put("UnderReplicated", underReplicated);
+ List<ContainerID> unclosed = new ArrayList<>();
+ unclosed.add(new ContainerID(10L));
+ unclosed.add(new ContainerID(11L));
+ unclosed.add(new ContainerID(12L));
+ containerMap.put("UnClosed", unclosed);
+ return containerMap;
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]