errose28 commented on code in PR #8893:
URL: https://github.com/apache/ozone/pull/8893#discussion_r2285991128
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java:
##########
@@ -607,8 +607,13 @@ ContainerCommandResponseProto handleCloseContainer(
return malformedRequest(request);
}
try {
+ ContainerProtos.ContainerDataProto.State currentState =
kvContainer.getContainerState();
markContainerForClose(kvContainer);
closeContainer(kvContainer);
+ if (currentState == RECOVERING) {
+ // trigger container scan for recovering containers, i.e., after EC
reconstruction
+
containerSet.scanContainer(kvContainer.getContainerData().getContainerID(), "EC
Reconstruction");
+ }
Review Comment:
Just a minor name/comment change to reflect the state of the container at
the time we are checking it.
```suggestion
ContainerProtos.ContainerDataProto.State previousState =
kvContainer.getContainerState();
markContainerForClose(kvContainer);
closeContainer(kvContainer);
if (previousState == RECOVERING) {
// trigger container scan for recovered containers after EC
reconstruction
containerSet.scanContainer(kvContainer.getContainerData().getContainerID(), "EC
Reconstruction");
}
```
##########
hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java:
##########
@@ -392,6 +393,45 @@ public void
testCloseInvalidContainer(ContainerLayoutVersion layoutVersion)
"Close container should return Invalid container error");
}
+ @ContainerLayoutTestInfo.ContainerTest
+ public void testCloseRecoveringContainerTriggersScan(ContainerLayoutVersion
layoutVersion) {
+ final ContainerSet containerSet = spy(newContainerSet());
Review Comment:
We don't need a new spy here. There's a `mockContainerSet` field in this
test already. The test still passes when we use that.
##########
hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java:
##########
@@ -392,6 +393,45 @@ public void
testCloseInvalidContainer(ContainerLayoutVersion layoutVersion)
"Close container should return Invalid container error");
}
+ @ContainerLayoutTestInfo.ContainerTest
+ public void testCloseRecoveringContainerTriggersScan(ContainerLayoutVersion
layoutVersion) {
+ final ContainerSet containerSet = spy(newContainerSet());
+ final KeyValueHandler keyValueHandler = new KeyValueHandler(conf,
+ DATANODE_UUID, containerSet, mock(MutableVolumeSet.class),
mock(ContainerMetrics.class),
+ c -> { }, new ContainerChecksumTreeManager(conf));
+
+ conf = new OzoneConfiguration();
+ KeyValueContainerData kvData = new
KeyValueContainerData(DUMMY_CONTAINER_ID,
+ layoutVersion,
+ (long) StorageUnit.GB.toBytes(1), UUID.randomUUID().toString(),
+ UUID.randomUUID().toString());
+ kvData.setMetadataPath(tempDir.toString());
+ kvData.setDbFile(dbFile.toFile());
+ KeyValueContainer container = new KeyValueContainer(kvData, conf);
+ ContainerCommandRequestProto createContainerRequest =
+ createContainerRequest(DATANODE_UUID, DUMMY_CONTAINER_ID);
+ keyValueHandler.handleCreateContainer(createContainerRequest, container);
+
+ // Make the container state as invalid.
+ kvData.setState(State.RECOVERING);
+
+ // Create Close container request
+ ContainerCommandRequestProto closeContainerRequest =
+ ContainerProtos.ContainerCommandRequestProto.newBuilder()
+ .setCmdType(ContainerProtos.Type.CloseContainer)
+ .setContainerID(DUMMY_CONTAINER_ID)
+ .setDatanodeUuid(DATANODE_UUID)
+ .setCloseContainer(ContainerProtos.CloseContainerRequestProto
+ .getDefaultInstance())
+ .build();
+ dispatcher.dispatch(closeContainerRequest, null);
+
+ keyValueHandler.handleCloseContainer(closeContainerRequest, container);
+
+ verify(keyValueHandler.getContainerSet(), atLeastOnce())
+ .scanContainer(DUMMY_CONTAINER_ID, "EC Reconstruction");
Review Comment:
This should do the same thing but we can get rid of the testing getter in
`Handler`.
```suggestion
verify(containerSet, atLeastOnce()).scanContainer(DUMMY_CONTAINER_ID,
"EC Reconstruction");
```
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java:
##########
Review Comment:
Are changes to this file still necessary now that the EC reconstruction
coordinator is not changing?
##########
hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java:
##########
@@ -305,6 +310,30 @@ public void
testContainerReconciliationWithPeerFailure(FailureLocation failureLo
mockContainerProtocolCalls();
}
+ @Test
+ public void testContainerReconciliationFailureContainerScan()
+ throws Exception {
+ // Use synchronous on-demand scans to re-build the merkle trees after
corruption.
+ datanodes.forEach(d -> d.scanContainer(CONTAINER_ID));
+
+ // Each datanode should have had one on-demand scan during test setup, and
a second one after corruption was
+ // introduced.
+ waitForExpectedScanCount(1);
+
+ for (MockDatanode current : datanodes) {
+
doThrow(IOException.class).when(current.getHandler().getChecksumManager()).read(any());
+ List<DatanodeDetails> peers = datanodes.stream()
+ .map(MockDatanode::getDnDetails)
+ .filter(other -> !current.getDnDetails().equals(other))
+ .collect(Collectors.toList());
+ // Reconciliation should fail for each datanode, since the checksum info
cannot be retrieved.
+ assertThrows(IOException.class, () ->
current.reconcileContainer(dnClient, peers, CONTAINER_ID));
+ Mockito.reset(current.getHandler().getChecksumManager());
+ }
+ // Even failure of Reconciliation should have triggered a second on-demand
scan for each replica.
+ waitForExpectedScanCount(2);
+ }
Review Comment:
This version of the test also passes, but does not require extra changes to
accommodate the spy.
```suggestion
@Test
public void testContainerReconciliationFailureContainerScan() throws
Exception {
for (MockDatanode current : datanodes) {
List<DatanodeDetails> peers = datanodes.stream()
.map(MockDatanode::getDnDetails)
.filter(other -> !current.getDnDetails().equals(other))
.collect(Collectors.toList());
FileUtils.deleteDirectory(new
File(current.getContainer(CONTAINER_ID).getContainerData().getMetadataPath()));
// Reconciliation should fail without a metadata directory present.
assertThrows(IOException.class, () ->
current.reconcileContainer(dnClient, peers, CONTAINER_ID));
Mockito.reset(current.getHandler().getChecksumManager());
}
// Each replica should have an on-demand scan triggered when
reconciliation failed.
waitForExpectedScanCount(1);
datanodes.forEach(d ->
assertTrue(d.getContainer(CONTAINER_ID).getContainerData().isUnhealthy()));
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]