errose28 commented on code in PR #3741: URL: https://github.com/apache/ozone/pull/3741#discussion_r1036615240
########## hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java: ########## @@ -88,6 +130,67 @@ public void testStartup() throws IOException { service.close(); } + @ParameterizedTest + @ValueSource(strings = {OzoneConsts.SCHEMA_V1, + OzoneConsts.SCHEMA_V2, OzoneConsts.SCHEMA_V3}) + public void testTmpDirOnStartup(String schemaVersion) throws IOException { + ContainerTestVersionInfo.setTestSchemaVersion(schemaVersion, conf); + LOG.info("SchemaV3_enabled: " + + conf.get(DatanodeConfiguration.CONTAINER_SCHEMA_V3_ENABLED)); + service.start(conf); + + // Get volumeSet and store volumes in temp folders + // in order to access them after service.stop() + MutableVolumeSet volumeSet = service + .getDatanodeStateMachine().getContainer().getVolumeSet(); + List<HddsVolume> volumes = StorageVolumeUtil.getHddsVolumesList( + volumeSet.getVolumesList()); + int volumeSetSize = volumes.size(); + File[] tempHddsVolumes = new File[volumeSetSize]; + + for (int i = 0; i < volumeSetSize; i++) { + HddsVolume volume = volumes.get(i); + volume.format(clusterId); + volume.createWorkingDir(clusterId, null); + volume.createDeleteServiceDir(); + if (this.tempDir.isDirectory()) { + tempHddsVolumes[i] = tempDir; + } + + // Write to tmp dir under volume + File testFile = new File(volume.getDeleteServiceDirPath() + + FILE_SEPARATOR + "testFile.txt"); + Files.touch(testFile); + assertTrue(testFile.exists()); + + ListIterator<File> tmpDirIter = KeyValueContainerUtil + .ContainerDeleteDirectory.getDeleteLeftovers(volume); + List<File> tmpDirFileList = new LinkedList<>(); + boolean testFileExistsUnderTmp = false; + + while (tmpDirIter.hasNext()) { + tmpDirFileList.add(tmpDirIter.next()); + } + + if (tmpDirFileList.contains(testFile)) { + testFileExistsUnderTmp = true; + } + assertTrue(testFileExistsUnderTmp); + } + conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, + Arrays.toString(tempHddsVolumes)); Review Comment: I'm not sure why this config key is being set right before shutdown. The test passes without this. We can remove the array with this config. ########## hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java: ########## @@ -395,4 +416,208 @@ public static Path getMetadataDirectory( return Paths.get(metadataPath); } + + /** + * Utilities for container_delete_service directory + * which is located under <volume>/hdds/<cluster-id>/tmp/. + * Containers will be moved under it before getting deleted + * to avoid, in case of failure, having artifact leftovers + * on the default container path on the disk. + * + * Delete operation for Schema < V3 + * 1. Container directory renamed to tmp directory. + * 2. Container is removed from in memory container set. + * 3. Container is deleted from tmp directory. + * + * Delete operation for Schema V3 + * 1. Container directory renamed to tmp directory. + * 2. Container is removed from in memory container set. + * 3. Container's entries are removed from RocksDB. + * 4. Container is deleted from tmp directory. + */ + public static class ContainerDeleteDirectory { + + /** + * Delete all files under + * <volume>/hdds/<cluster-id>/tmp/container_delete_service. + */ + public static synchronized void cleanTmpDir(HddsVolume hddsVolume) + throws IOException { + if (hddsVolume.getStorageState() != StorageVolume.VolumeState.NORMAL) { + LOG.debug("Call to clean tmp dir container_delete_service directory " + + "for {} while VolumeState {}", + hddsVolume.getStorageDir(), + hddsVolume.getStorageState().toString()); + return; + } + + if (!hddsVolume.getClusterID().isEmpty()) { + // Initialize tmp and delete service directories + hddsVolume.checkTmpDirPaths(hddsVolume.getClusterID()); + } else { + LOG.error("Volume hasn't been formatted " + + "properly and has no ClusterId. " + + "Unable to initialize tmp and delete service directories."); + } + + ListIterator<File> leftoversListIt = getDeleteLeftovers(hddsVolume); + + while (leftoversListIt.hasNext()) { + File file = leftoversListIt.next(); + + // Check the case where we have Schema V3 and + // removing the container's entries from RocksDB fails. + // -------------------------------------------- + // On datanode restart, we populate the container set + // based on the available datanode volumes and + // populate the container metadata based on the values in RocksDB. + // The container is in the tmp directory, + // so it won't be loaded in the container set + // but there will be orphaned entries in the volume's RocksDB. + // -------------------------------------------- + // For every .container file we find under /tmp, + // we use it to get the RocksDB entries and delete them. + // If the .container file doesn't exist then the contents of the + // directory are probably leftovers of a failed delete and + // the RocksDB entries must have already been removed. + // In any case we can proceed with deleting the directory's contents. + if (VersionedDatanodeFeatures.isFinalized( Review Comment: What we want to check here is if the container is using schema V3, not if schema v3 is finalized. So we need to load the container file first and check the schema version. Only if that schema version is v3 do we need to call removeContainerFromDB. ########## hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java: ########## @@ -88,6 +130,67 @@ public void testStartup() throws IOException { service.close(); } + @ParameterizedTest + @ValueSource(strings = {OzoneConsts.SCHEMA_V1, + OzoneConsts.SCHEMA_V2, OzoneConsts.SCHEMA_V3}) + public void testTmpDirOnStartup(String schemaVersion) throws IOException { Review Comment: It's called test temp dir on startup, but it actually tests it on shutdown. Do we want to test startup cleaning here as well? ########## hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerPersistence.java: ########## @@ -286,19 +288,161 @@ public void testDeleteContainer() throws Exception { BlockData someKey1 = new BlockData(blockID1); someKey1.setChunks(new LinkedList<ContainerProtos.ChunkInfo>()); blockManager.putBlock(container1, someKey1); + } + + @Test + public void testDeleteNonEmptyContainer() throws Exception { + long testContainerID2 = getTestContainerID(); + Container container2 = addContainer(containerSet, testContainerID2); + container2.close(); + + Assert.assertTrue(containerSet.getContainerMapCopy() + .containsKey(testContainerID2)); + + // With schema v3, we don't have a container dedicated db, + // so skip check the behaviors related to it. + assumeFalse(schemaVersion.contains(OzoneConsts.SCHEMA_V3)); // Deleting a non-empty container should fail. BlockID blockID2 = ContainerTestHelper.getTestBlockID(testContainerID2); BlockData someKey2 = new BlockData(blockID2); someKey2.setChunks(new LinkedList<ContainerProtos.ChunkInfo>()); blockManager.putBlock(container2, someKey2); + // KeyValueHandler setup + String datanodeId = UUID.randomUUID().toString(); + + int[] interval = new int[1]; + interval[0] = 2; + ContainerMetrics metrics = new ContainerMetrics(interval); + + AtomicInteger icrReceived = new AtomicInteger(0); Review Comment: I don't see the value getting read anywhere so I was just wondering if it was being used in the lambda somehow and I missed it. ########## hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java: ########## @@ -395,4 +416,174 @@ public static Path getMetadataDirectory( return Paths.get(metadataPath); } + + /** + * Utilities for container_delete_service directory + * which is located under <volume>/hdds/<cluster-id>/tmp/. + * Containers will be moved under it before getting deleted + * to avoid, in case of failure, having artifact leftovers + * on the default container path on the disk. + */ + public static class ContainerDeleteDirectory { + + /** + * Delete all files under + * <volume>/hdds/<cluster-id>/tmp/container_delete_service. + */ + public static synchronized void cleanTmpDir(HddsVolume hddsVolume) + throws IOException { + if (hddsVolume.getStorageState() != StorageVolume.VolumeState.NORMAL) { + LOG.debug("Call to clean tmp dir container_delete_service directory " + + "for {} while VolumeState {}", + hddsVolume.getStorageDir(), + hddsVolume.getStorageState().toString()); + return; + } + + if (!hddsVolume.getClusterID().isEmpty()) { + // Initialize delete directory + hddsVolume.createDeleteServiceDir(); + } else { + throw new IOException("Volume has no ClusterId"); + } + + ListIterator<File> leftoversListIt = getDeleteLeftovers(hddsVolume); + + while (leftoversListIt.hasNext()) { + File file = leftoversListIt.next(); + + // If SchemaV3 is enabled and we have a RocksDB + if (VersionedDatanodeFeatures.isFinalized( + HDDSLayoutFeature.DATANODE_SCHEMA_V3)) { + // Get container file + File containerFile = getContainerFile(file); + + // If file exists + if (containerFile != null) { + ContainerData containerData = ContainerDataYaml + .readContainerFile(containerFile); + KeyValueContainerData keyValueContainerData = + (KeyValueContainerData) containerData; + + // Remove container from Rocks DB + String dbPath = hddsVolume.getDbParentDir().getAbsolutePath(); + DatanodeStoreSchemaThreeImpl store = + new DatanodeStoreSchemaThreeImpl(hddsVolume.getConf(), + dbPath, false); + store.removeKVContainerData(keyValueContainerData.getContainerID()); + } + } + + try { + if (file.isDirectory()) { + FileUtils.deleteDirectory(file); + } else { + FileUtils.delete(file); + } + } catch (IOException ex) { + LOG.error("Failed to delete directory or file inside " + + "{}", hddsVolume.getDeleteServiceDirPath().toString(), ex); + } + } + } + + /** + * Search recursively for the container file under a + * directory. Return null if the file is not found. + * @param file + * @return container file or null if it doesn't exist + * @throws IOException + */ + public static File getContainerFile(File file) throws IOException { Review Comment: What would be an example of this failure case? `ContainerUtils#getContainerFile` just requires that the container file is in a certain subdirectory of the base container directory. The delete does not move this file relative to the base location. ########## hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java: ########## @@ -416,6 +418,8 @@ ContainerCommandResponseProto handleDeleteContainer( deleteInternal(kvContainer, forceDelete); } catch (StorageContainerException ex) { return ContainerUtils.logAndReturnError(LOG, ex, request); + } catch (IOException ex) { Review Comment: We still need to throw the StorageContainerException if the move fails so that SCM will retry the delete. My point about the scanners was that it is ok if SCM keeps retrying even though there is some IO issue with the volume, because the scanners will eventually pick up on it and fail the volume, after which the deletions will not be sent again. ########## hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsDatanodeService.java: ########## @@ -33,24 +48,52 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_TOKEN_ENABLED; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Test class for {@link HddsDatanodeService}. */ + public class TestHddsDatanodeService { + + @TempDir + private File tempDir; private File testDir; - private OzoneConfiguration conf; - private HddsDatanodeService service; - private String[] args = new String[] {}; + private static final Logger LOG = + LoggerFactory.getLogger(TestHddsDatanodeService.class); + + private final String clusterId = UUID.randomUUID().toString(); + private final OzoneConfiguration conf = new OzoneConfiguration(); + private final String[] args = new String[] {}; + private final HddsDatanodeService service = + HddsDatanodeService.createHddsDatanodeService(args); Review Comment: ```suggestion private final HddsDatanodeService service = HddsDatanodeService.createHddsDatanodeService(new String[]{}); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org For additional commands, e-mail: issues-h...@ozone.apache.org