Repository: mesos Updated Branches: refs/heads/master 175416694 -> f4345c1f1
Added soft limit and kill to `disk/xfs`. Added a new flag `--xfs_kill_containers` to the disk/xfs isolator. This will create a 10 MB buffer between the soft and hard limits for a container. When the soft limit is exceeded the container will subsequently be killed. Review: https://reviews.apache.org/r/66001/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9277c50d Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9277c50d Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9277c50d Branch: refs/heads/master Commit: 9277c50d93e616c038d6a7bf6c153cd8ac04d629 Parents: 1754166 Author: Harold Dost <h.d...@criteo.com> Authored: Wed Apr 25 08:46:30 2018 -0700 Committer: James Peach <jpe...@apache.org> Committed: Mon Apr 30 13:46:38 2018 -0700 ---------------------------------------------------------------------- .../containerizer/mesos/isolators/xfs/disk.cpp | 133 +++++++++++++++---- .../containerizer/mesos/isolators/xfs/disk.hpp | 14 +- .../containerizer/mesos/isolators/xfs/utils.cpp | 48 +++++-- .../containerizer/mesos/isolators/xfs/utils.hpp | 20 ++- src/slave/flags.cpp | 8 +- src/slave/flags.hpp | 1 + src/tests/containerizer/xfs_quota_tests.cpp | 4 +- 7 files changed, 180 insertions(+), 48 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/9277c50d/src/slave/containerizer/mesos/isolators/xfs/disk.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp index 8d9f8f8..db11f9a 100644 --- a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp +++ b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp @@ -18,7 +18,10 @@ #include <glog/logging.h> +#include <process/after.hpp> +#include <process/dispatch.hpp> #include <process/id.hpp> +#include <process/loop.hpp> #include <stout/check.hpp> #include <stout/foreach.hpp> @@ -26,6 +29,8 @@ #include <stout/os/stat.hpp> +#include "common/protobuf_utils.hpp" + #include "slave/paths.hpp" using std::list; @@ -36,7 +41,6 @@ using process::Future; using process::Owned; using process::PID; using process::Process; -using process::Promise; using mesos::slave::ContainerConfig; using mesos::slave::ContainerLaunchInfo; @@ -156,21 +160,28 @@ Try<Isolator*> XfsDiskIsolatorProcess::create(const Flags& flags) return Error(status->message); } - xfs::QuotaPolicy quotaPolicy = - flags.enforce_container_disk_quota ? xfs::QuotaPolicy::ENFORCING - : xfs::QuotaPolicy::ACCOUNTING; + xfs::QuotaPolicy quotaPolicy = xfs::QuotaPolicy::ACCOUNTING; + + if (flags.enforce_container_disk_quota) { + quotaPolicy = flags.xfs_kill_containers + ? xfs::QuotaPolicy::ENFORCING_ACTIVE + : xfs::QuotaPolicy::ENFORCING_PASSIVE; + } return new MesosIsolator(Owned<MesosIsolatorProcess>( new XfsDiskIsolatorProcess( + flags.container_disk_watch_interval, quotaPolicy, flags.work_dir, totalProjectIds.get()))); } XfsDiskIsolatorProcess::XfsDiskIsolatorProcess( + Duration _watchInterval, xfs::QuotaPolicy _quotaPolicy, const std::string& _workDir, const IntervalSet<prid_t>& projectIds) : ProcessBase(process::ID::generate("xfs-disk-isolator")), + watchInterval(_watchInterval), quotaPolicy(_quotaPolicy), workDir(_workDir), totalProjectIds(projectIds), @@ -298,15 +309,20 @@ Future<Option<ContainerLaunchInfo>> XfsDiskIsolatorProcess::prepare( } -Future<Nothing> XfsDiskIsolatorProcess::isolate( - const ContainerID& containerId, - pid_t pid) +Future<ContainerLimitation> XfsDiskIsolatorProcess::watch( + const ContainerID& containerId) { - if (!infos.contains(containerId)) { - return Failure("Unknown container"); + if (infos.contains(containerId)) { + return infos[containerId]->limitation.future(); } - return Nothing(); + // Any container that did not have a project ID assigned when + // we recovered it won't be tracked. This will happend when the + // isolator is first enabled, since we didn't get a chance to + // assign project IDs to existing containers. We don't want to + // cause those containers to fail, so we just ignore them. + LOG(WARNING) << "Ignoring watch for unknown container " << containerId; + return Future<ContainerLimitation>(); } @@ -320,8 +336,8 @@ Future<Nothing> XfsDiskIsolatorProcess::update( } const Owned<Info>& info = infos[containerId]; - Option<Bytes> needed = getDiskResource(resources); + if (needed.isNone()) { // TODO(jpeach) If there's no disk resource attached, we should set the // minimum quota (1 block), since a zero quota would be unconstrained. @@ -342,9 +358,20 @@ Future<Nothing> XfsDiskIsolatorProcess::update( break; } - case xfs::QuotaPolicy::ENFORCING: { + case xfs::QuotaPolicy::ENFORCING_ACTIVE: + case xfs::QuotaPolicy::ENFORCING_PASSIVE: { + Bytes hardLimit = needed.get(); + + // The purpose behind adding to the hard limit is so that the soft + // limit can be exceeded thereby allowing us to check if the limit + // has been reached without allowing the process to allocate too + // much beyond the desired limit. + if (quotaPolicy == xfs::QuotaPolicy::ENFORCING_ACTIVE) { + hardLimit += Megabytes(10); + } + Try<Nothing> status = xfs::setProjectQuota( - info->directory, info->projectId, needed.get()); + info->directory, info->projectId, needed.get(), hardLimit); if (status.isError()) { return Failure("Failed to update quota for project " + @@ -353,7 +380,7 @@ Future<Nothing> XfsDiskIsolatorProcess::update( LOG(INFO) << "Set quota on container " << containerId << " for project " << info->projectId - << " to " << needed.get(); + << " to " << needed.get() << "/" << hardLimit; break; } @@ -364,6 +391,41 @@ Future<Nothing> XfsDiskIsolatorProcess::update( } +void XfsDiskIsolatorProcess::check() +{ + CHECK(quotaPolicy == xfs::QuotaPolicy::ENFORCING_ACTIVE); + + foreachpair(const ContainerID& containerId, const Owned<Info>& info, infos) { + Result<xfs::QuotaInfo> quotaInfo = xfs::getProjectQuota( + info->directory, info->projectId); + + if (quotaInfo.isError()) { + LOG(WARNING) << "Failed to check disk usage for container '" + << containerId << "': " << quotaInfo.error(); + + continue; + } + + // If the soft limit is exceeded the container should be killed. + if (quotaInfo->used > quotaInfo->softLimit) { + Resource resource; + resource.set_name("disk"); + resource.set_type(Value::SCALAR); + resource.mutable_scalar()->set_value( + quotaInfo->used.bytes() / Bytes::MEGABYTES); + + info->limitation.set( + protobuf::slave::createContainerLimitation( + Resources(resource), + "Disk usage (" + stringify(quotaInfo->used) + + ") exceeds quota (" + + stringify(quotaInfo->softLimit) + ")", + TaskStatus::REASON_CONTAINER_LIMITATION_DISK)); + } + } +} + + Future<ResourceStatistics> XfsDiskIsolatorProcess::usage( const ContainerID& containerId) { @@ -408,26 +470,27 @@ Future<Nothing> XfsDiskIsolatorProcess::cleanup(const ContainerID& containerId) // Take a copy of the Info we are removing so that we can use it // to construct the Failure message if necessary. - const Info info = *infos[containerId]; + const std::string directory = infos[containerId]->directory; + const prid_t projectId = infos[containerId]->projectId; infos.erase(containerId); - LOG(INFO) << "Removing project ID " << info.projectId - << " from '" << info.directory << "'"; + LOG(INFO) << "Removing project ID " << projectId + << " from '" << directory << "'"; Try<Nothing> quotaStatus = xfs::clearProjectQuota( - info.directory, info.projectId); + directory, projectId); if (quotaStatus.isError()) { LOG(ERROR) << "Failed to clear quota for '" - << info.directory << "': " << quotaStatus.error(); + << directory << "': " << quotaStatus.error(); } - Try<Nothing> projectStatus = xfs::clearProjectId(info.directory); + Try<Nothing> projectStatus = xfs::clearProjectId(directory); if (projectStatus.isError()) { LOG(ERROR) << "Failed to remove project ID " - << info.projectId - << " from '" << info.directory << "': " + << projectId + << " from '" << directory << "': " << projectStatus.error(); } @@ -436,10 +499,10 @@ Future<Nothing> XfsDiskIsolatorProcess::cleanup(const ContainerID& containerId) // would be a project ID leak, but we could recover it at GC time if // that was visible to isolators. if (quotaStatus.isError() || projectStatus.isError()) { - freeProjectIds -= info.projectId; - return Failure("Failed to cleanup '" + info.directory + "'"); + freeProjectIds -= projectId; + return Failure("Failed to cleanup '" + directory + "'"); } else { - returnProjectId(info.projectId); + returnProjectId(projectId); return Nothing(); } } @@ -469,6 +532,26 @@ void XfsDiskIsolatorProcess::returnProjectId( } } + +void XfsDiskIsolatorProcess::initialize() +{ + process::PID<XfsDiskIsolatorProcess> self(this); + + if (quotaPolicy == xfs::QuotaPolicy::ENFORCING_ACTIVE) { + // Start a loop to periodically check for containers + // breaking the soft limit. + process::loop( + self, + [=]() { + return process::after(watchInterval); + }, + [=](const Nothing&) -> process::ControlFlow<Nothing> { + check(); + return process::Continue(); + }); + } +} + } // namespace slave { } // namespace internal { } // namespace mesos { http://git-wip-us.apache.org/repos/asf/mesos/blob/9277c50d/src/slave/containerizer/mesos/isolators/xfs/disk.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.hpp b/src/slave/containerizer/mesos/isolators/xfs/disk.hpp index 07e68a7..9a5ca8b 100644 --- a/src/slave/containerizer/mesos/isolators/xfs/disk.hpp +++ b/src/slave/containerizer/mesos/isolators/xfs/disk.hpp @@ -55,9 +55,8 @@ public: const ContainerID& containerId, const mesos::slave::ContainerConfig& containerConfig); - virtual process::Future<Nothing> isolate( - const ContainerID& containerId, - pid_t pid); + virtual process::Future<mesos::slave::ContainerLimitation> watch( + const ContainerID& containerId); virtual process::Future<Nothing> update( const ContainerID& containerId, @@ -69,12 +68,19 @@ public: virtual process::Future<Nothing> cleanup( const ContainerID& containerId); +protected: + virtual void initialize(); + private: XfsDiskIsolatorProcess( + Duration watchInterval, xfs::QuotaPolicy quotaPolicy, const std::string& workDir, const IntervalSet<prid_t>& projectIds); + // Responsible for validating a container hasn't broken the soft limit. + void check(); + // Take the next project ID from the unallocated pool. Option<prid_t> nextProjectId(); @@ -89,8 +95,10 @@ private: const std::string directory; Bytes quota; const prid_t projectId; + process::Promise<mesos::slave::ContainerLimitation> limitation; }; + const Duration watchInterval; xfs::QuotaPolicy quotaPolicy; const std::string workDir; const IntervalSet<prid_t> totalProjectIds; http://git-wip-us.apache.org/repos/asf/mesos/blob/9277c50d/src/slave/containerizer/mesos/isolators/xfs/utils.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/isolators/xfs/utils.cpp b/src/slave/containerizer/mesos/isolators/xfs/utils.cpp index 2708524..cc14254 100644 --- a/src/slave/containerizer/mesos/isolators/xfs/utils.cpp +++ b/src/slave/containerizer/mesos/isolators/xfs/utils.cpp @@ -134,7 +134,8 @@ namespace internal { static Try<Nothing> setProjectQuota( const string& path, prid_t projectId, - Bytes limit) + Bytes softLimit, + Bytes hardLimit) { Try<string> devname = getDeviceForPath(path); if (devname.isError()) { @@ -148,14 +149,10 @@ static Try<Nothing> setProjectQuota( // Specify that we are setting a project quota for this ID. quota.d_id = projectId; quota.d_flags = FS_PROJ_QUOTA; - - // Set both the hard and the soft limit to the same quota. Functionally - // all we need is the hard limit. The soft limit has no effect when it - // is the same as the hard limit but we set it for explicitness. quota.d_fieldmask = FS_DQ_BSOFT | FS_DQ_BHARD; - quota.d_blk_hardlimit = BasicBlocks(limit).blocks(); - quota.d_blk_softlimit = BasicBlocks(limit).blocks(); + quota.d_blk_hardlimit = BasicBlocks(hardLimit).blocks(); + quota.d_blk_softlimit = BasicBlocks(softLimit).blocks(); if (::quotactl(QCMD(Q_XSETQLIM, PRJQUOTA), devname.get().c_str(), @@ -246,7 +243,8 @@ Result<QuotaInfo> getProjectQuota( } QuotaInfo info; - info.limit = BasicBlocks(quota.d_blk_hardlimit).bytes(); + info.softLimit = BasicBlocks(quota.d_blk_softlimit).bytes(); + info.hardLimit = BasicBlocks(quota.d_blk_hardlimit).bytes(); info.used = BasicBlocks(quota.d_bcount).bytes(); return info; @@ -256,7 +254,31 @@ Result<QuotaInfo> getProjectQuota( Try<Nothing> setProjectQuota( const string& path, prid_t projectId, - Bytes limit) + Bytes softLimit, + Bytes hardLimit) +{ + if (projectId == NON_PROJECT_ID) { + return nonProjectError(); + } + + // A 0 limit deletes the quota record. If that's desired, the + // caller should use clearProjectQuota(). + if (hardLimit == 0) { + return Error("Quota hard limit must be greater than 0"); + } + + if (softLimit == 0) { + return Error("Quota soft limit must be greater than 0"); + } + + return internal::setProjectQuota(path, projectId, softLimit, hardLimit); +} + + +Try<Nothing> setProjectQuota( + const string& path, + prid_t projectId, + Bytes hardLimit) { if (projectId == NON_PROJECT_ID) { return nonProjectError(); @@ -264,11 +286,11 @@ Try<Nothing> setProjectQuota( // A 0 limit deletes the quota record. If that's desired, the // caller should use clearProjectQuota(). - if (limit == 0) { - return Error( "Quota limit must be greater than 0"); + if (hardLimit == 0) { + return Error("Quota limit must be greater than 0"); } - return internal::setProjectQuota(path, projectId, limit); + return internal::setProjectQuota(path, projectId, hardLimit, hardLimit); } @@ -280,7 +302,7 @@ Try<Nothing> clearProjectQuota( return nonProjectError(); } - return internal::setProjectQuota(path, projectId, Bytes(0)); + return internal::setProjectQuota(path, projectId, Bytes(0), Bytes(0)); } http://git-wip-us.apache.org/repos/asf/mesos/blob/9277c50d/src/slave/containerizer/mesos/isolators/xfs/utils.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/isolators/xfs/utils.hpp b/src/slave/containerizer/mesos/isolators/xfs/utils.hpp index e034133..e269eb5 100644 --- a/src/slave/containerizer/mesos/isolators/xfs/utils.hpp +++ b/src/slave/containerizer/mesos/isolators/xfs/utils.hpp @@ -32,7 +32,8 @@ namespace xfs { struct QuotaInfo { - Bytes limit; + Bytes softLimit; + Bytes hardLimit; Bytes used; }; @@ -70,14 +71,18 @@ private: enum class QuotaPolicy { - ENFORCING, ACCOUNTING, + ENFORCING_ACTIVE, + ENFORCING_PASSIVE }; inline bool operator==(const QuotaInfo& left, const QuotaInfo& right) { - return left.limit == right.limit && left.used == right.used; + return + left.hardLimit == right.hardLimit && + left.softLimit == right.softLimit && + left.used == right.used; } @@ -101,7 +106,14 @@ Result<QuotaInfo> getProjectQuota( Try<Nothing> setProjectQuota( const std::string& path, prid_t projectId, - Bytes limit); + Bytes softLimit, + Bytes hardLimit); + + +Try<Nothing> setProjectQuota( + const std::string& path, + prid_t projectId, + Bytes hardLimit); Try<Nothing> clearProjectQuota( http://git-wip-us.apache.org/repos/asf/mesos/blob/9277c50d/src/slave/flags.cpp ---------------------------------------------------------------------- diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp index 0ee4f65..02e1a8b 100644 --- a/src/slave/flags.cpp +++ b/src/slave/flags.cpp @@ -1131,7 +1131,7 @@ mesos::internal::slave::Flags::Flags() add(&Flags::container_disk_watch_interval, "container_disk_watch_interval", "The interval between disk quota checks for containers. This flag is\n" - "used by the `disk/du` isolator.", + "used by the `disk/du` and `disk/xfs` isolators.", Seconds(15)); // TODO(jieyu): Consider enabling this flag by default. Remember @@ -1310,6 +1310,12 @@ mesos::internal::slave::Flags::Flags() "xfs_project_range", "The ranges of XFS project IDs to use for tracking directory quotas", "[5000-10000]"); + + add(&Flags::xfs_kill_containers, + "xfs_kill_containers", + "Whether the `disk/xfs` isolator should detect and terminate\n" + "containers that exceed their allocated disk quota.", + false); #endif add(&Flags::http_command_executor, http://git-wip-us.apache.org/repos/asf/mesos/blob/9277c50d/src/slave/flags.hpp ---------------------------------------------------------------------- diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp index 4195d3b..a839591 100644 --- a/src/slave/flags.hpp +++ b/src/slave/flags.hpp @@ -178,6 +178,7 @@ public: Option<std::string> master_detector; #if ENABLE_XFS_DISK_ISOLATOR std::string xfs_project_range; + bool xfs_kill_containers; #endif bool http_command_executor; Option<SlaveCapabilities> agent_features; http://git-wip-us.apache.org/repos/asf/mesos/blob/9277c50d/src/tests/containerizer/xfs_quota_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/containerizer/xfs_quota_tests.cpp b/src/tests/containerizer/xfs_quota_tests.cpp index 64c3e1c..75500ed 100644 --- a/src/tests/containerizer/xfs_quota_tests.cpp +++ b/src/tests/containerizer/xfs_quota_tests.cpp @@ -77,7 +77,7 @@ static QuotaInfo makeQuotaInfo( Bytes limit, Bytes used) { - return {limit, used}; + return {limit, limit, used}; } @@ -267,7 +267,7 @@ TEST_F(ROOT_XFS_QuotaTest, QuotaGetSet) Result<QuotaInfo> info = getProjectQuota(root, projectId); ASSERT_SOME(info); - EXPECT_EQ(limit, info->limit); + EXPECT_EQ(limit, info->hardLimit); EXPECT_EQ(Bytes(0), info->used); EXPECT_SOME(clearProjectQuota(root, projectId));