This is an automated email from the ASF dual-hosted git repository. chhsiao pushed a commit to branch 1.7.x in repository https://gitbox.apache.org/repos/asf/mesos.git
commit d336d4add48a2776b9336dfea0e2ca915cc88e09 Author: Chun-Hung Hsiao <chhs...@mesosphere.io> AuthorDate: Tue Feb 12 20:49:42 2019 -0800 Made SLRP clean up mount directories for destroyed MOUNT disks. When the SLRP stage/publish a CSI volume, it creates the staging/target paths, but only the target path is cleaned up during unpublish. Moreover, even if the CSI plugin does not support node-staging, the current cleanup is still not enough, as the parent directory of the target path is not removed. This patch fixes this problem. Review: https://reviews.apache.org/r/69970 --- src/csi/paths.cpp | 43 +++++++++++++- src/csi/paths.hpp | 14 +++++ src/resource_provider/storage/provider.cpp | 67 +++++++++++++++++++--- src/resource_provider/storage/provider_process.hpp | 1 + 4 files changed, 115 insertions(+), 10 deletions(-) diff --git a/src/csi/paths.cpp b/src/csi/paths.cpp index 772d960..a1bd021 100644 --- a/src/csi/paths.cpp +++ b/src/csi/paths.cpp @@ -282,11 +282,50 @@ string getMountRootDir( } +Try<list<string>> getMountPaths(const string& mountRootDir) +{ + return fs::list(path::join(mountRootDir, "*")); +} + + +string getMountPath(const string& mountRootDir, const string& volumeId) +{ + // Volume ID is percent-encoded to avoid invalid characters in the path. + return path::join(mountRootDir, http::encode(volumeId)); +} + + +Try<string> parseMountPath(const string& mountRootDir, const string& dir) +{ + // TODO(chhsiao): Consider using `<regex>`, which requires GCC 4.9+. + + // Make sure there's a separator at the end of the `mountRootDir` so that we + // don't accidentally slice off part of a directory. + const string prefix = path::join(mountRootDir, ""); + + if (!strings::startsWith(dir, prefix)) { + return Error( + "Directory '" + dir + "' does not fall under the mount root directory" + " '" + mountRootDir + "'"); + } + + // Volume ID is percent-encoded to avoid invalid characters in the path. + Try<string> volumeId = http::decode(Path(dir).basename()); + if (volumeId.isError()) { + return Error( + "Could not decode volume ID from string '" + Path(dir).basename() + + "': " + volumeId.error()); + } + + return volumeId.get(); +} + + string getMountStagingPath( const string& mountRootDir, const string& volumeId) { - return path::join(mountRootDir, http::encode(volumeId), STAGING_DIR); + return path::join(getMountPath(mountRootDir, volumeId), STAGING_DIR); } @@ -294,7 +333,7 @@ string getMountTargetPath( const string& mountRootDir, const string& volumeId) { - return path::join(mountRootDir, http::encode(volumeId), TARGET_DIR); + return path::join(getMountPath(mountRootDir, volumeId), TARGET_DIR); } } // namespace paths { diff --git a/src/csi/paths.hpp b/src/csi/paths.hpp index 7a4e9e0..8673b03 100644 --- a/src/csi/paths.hpp +++ b/src/csi/paths.hpp @@ -136,6 +136,20 @@ std::string getMountRootDir( const std::string& name); +Try<std::list<std::string>> getMountPaths( + const std::string& mountRootDir); + + +std::string getMountPath( + const std::string& mountRootDir, + const std::string& volumeId); + + +Try<std::string> parseMountPath( + const std::string& mountRootDir, + const std::string& dir); + + std::string getMountStagingPath( const std::string& mountRootDir, const std::string& volumeId); diff --git a/src/resource_provider/storage/provider.cpp b/src/resource_provider/storage/provider.cpp index 7b2f8b9..eefcb0f 100644 --- a/src/resource_provider/storage/provider.cpp +++ b/src/resource_provider/storage/provider.cpp @@ -961,6 +961,34 @@ Future<Nothing> StorageLocalResourceProviderProcess::recoverVolumes() } } + // Garbage collect leftover mount paths that were failed to remove before. + const string mountRootDir = csi::paths::getMountRootDir( + slave::paths::getCsiRootDir(workDir), + info.storage().plugin().type(), + info.storage().plugin().name()); + + Try<list<string>> mountPaths = csi::paths::getMountPaths(mountRootDir); + if (mountPaths.isError()) { + // TODO(chhsiao): This could indicate that something is seriously wrong. To + // help debugging the problem, we should surface the error via MESOS-8745. + return Failure( + "Failed to find mount paths for CSI plugin type '" + + info.storage().plugin().type() + "' and name '" + + info.storage().plugin().name() + "': " + mountPaths.error()); + } + + foreach (const string& path, mountPaths.get()) { + Try<string> volumeId = csi::paths::parseMountPath(mountRootDir, path); + if (volumeId.isError()) { + return Failure( + "Failed to parse mount path '" + path + "': " + volumeId.error()); + } + + if (!volumes.contains(volumeId.get())) { + garbageCollectMountPath(volumeId.get()); + } + } + return collect(futures).then([] { return Nothing(); }); } @@ -2349,6 +2377,7 @@ Future<Nothing> StorageLocalResourceProviderProcess::nodeStage( info.storage().plugin().name()), volumeId); + // NOTE: The staging path will be cleaned up in `deleteVolume`. Try<Nothing> mkdir = os::mkdir(stagingPath); if (mkdir.isError()) { return Failure( @@ -2455,6 +2484,7 @@ Future<Nothing> StorageLocalResourceProviderProcess::nodePublish( info.storage().plugin().name()), volumeId); + // NOTE: The target path will be cleaned up in `deleteVolume`. Try<Nothing> mkdir = os::mkdir(targetPath); if (mkdir.isError()) { return Failure( @@ -2552,13 +2582,6 @@ Future<Nothing> StorageLocalResourceProviderProcess::nodeUnpublish( volume.state.set_state(VolumeState::VOL_READY); checkpointVolumeState(volumeId); - Try<Nothing> rmdir = os::rmdir(targetPath); - if (rmdir.isError()) { - return Failure( - "Failed to remove mount point '" + targetPath + "': " + - rmdir.error()); - } - return Nothing(); })); } @@ -2706,7 +2729,13 @@ Future<bool> StorageLocalResourceProviderProcess::deleteVolume( return deleted .then(defer(self(), [this, volumeId, volumePath] { volumes.erase(volumeId); - CHECK_SOME(os::rmdir(volumePath)); + + Try<Nothing> rmdir = os::rmdir(volumePath); + CHECK_SOME(rmdir) + << "Failed to remove checkpointed volume state at '" << volumePath + << "': " << rmdir.error(); + + garbageCollectMountPath(volumeId); return controllerCapabilities.createDeleteVolume; })); @@ -3419,6 +3448,28 @@ void StorageLocalResourceProviderProcess::garbageCollectOperationPath( } +void StorageLocalResourceProviderProcess::garbageCollectMountPath( + const string& volumeId) +{ + CHECK(!volumes.contains(volumeId)); + + const string path = csi::paths::getMountPath( + csi::paths::getMountRootDir( + slave::paths::getCsiRootDir(workDir), + info.storage().plugin().type(), + info.storage().plugin().name()), + volumeId); + + if (os::exists(path)) { + Try<Nothing> rmdir = os::rmdir(path); + if (rmdir.isError()) { + LOG(ERROR) + << "Failed to remove directory '" << path << "': " << rmdir.error(); + } + } +} + + void StorageLocalResourceProviderProcess::checkpointResourceProviderState() { ResourceProviderState state; diff --git a/src/resource_provider/storage/provider_process.hpp b/src/resource_provider/storage/provider_process.hpp index 2ba24ae..a5536b3 100644 --- a/src/resource_provider/storage/provider_process.hpp +++ b/src/resource_provider/storage/provider_process.hpp @@ -327,6 +327,7 @@ private: const Try<std::vector<ResourceConversion>>& conversions); void garbageCollectOperationPath(const id::UUID& operationUuid); + void garbageCollectMountPath(const std::string& volumeId); void checkpointResourceProviderState(); void checkpointVolumeState(const std::string& volumeId);