Fixed the container status for nested containers. Prior to this patch, the ContainerStatus inside the TaskStatus for a task in the default executor is not correct. The agent always gets the container status of the top level container, even if the task might correspond to a nested container.
This patch fixed this issue by letting the executor set the container ID for the task (because only it has the knowledge about this mapping), and the agent will use that container ID to get the proper container status. Review: https://reviews.apache.org/r/53465 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/382b2ea9 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/382b2ea9 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/382b2ea9 Branch: refs/heads/master Commit: 382b2ea9b2de9d52b4a78dd3ea8afc3946c60826 Parents: 2c0ddba Author: Jie Yu <yujie....@gmail.com> Authored: Thu Nov 3 18:44:58 2016 -0700 Committer: Jie Yu <yujie....@gmail.com> Committed: Mon Nov 7 16:37:25 2016 -0800 ---------------------------------------------------------------------- src/launcher/default_executor.cpp | 11 +++++++++++ .../containerizer/mesos/isolators/cgroups/cgroups.cpp | 5 ++++- .../containerizer/mesos/isolators/network/cni/cni.cpp | 5 ++++- src/slave/slave.cpp | 14 +++++++++++++- 4 files changed, 32 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/382b2ea9/src/launcher/default_executor.cpp ---------------------------------------------------------------------- diff --git a/src/launcher/default_executor.cpp b/src/launcher/default_executor.cpp index b8a88ad..f4e1ea4 100644 --- a/src/launcher/default_executor.cpp +++ b/src/launcher/default_executor.cpp @@ -869,6 +869,17 @@ private: status.set_healthy(healthy.get()); } + // Fill the container ID associated with this task. + // TODO(jieyu): Consider maintain a hashmap between TaskID to + // ContainerID so that we don't have to loop through all tasks. + foreach (const ContainerID& containerId, containers.keys()) { + if (containers[containerId] == taskId) { + ContainerStatus* containerStatus = status.mutable_container_status(); + containerStatus->mutable_container_id()->CopyFrom(containerId); + break; + } + } + Call call; call.set_type(Call::UPDATE); http://git-wip-us.apache.org/repos/asf/mesos/blob/382b2ea9/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp index 2f6723c..3955cc4 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp @@ -743,8 +743,11 @@ Future<ResourceStatistics> CgroupsIsolatorProcess::usage( Future<ContainerStatus> CgroupsIsolatorProcess::status( const ContainerID& containerId) { + // TODO(jieyu): Currently, all nested containers share the same + // cgroup as their parent container. Revisit this once this is no + // long true. if (containerId.has_parent()) { - return Failure("Not supported for nested containers"); + return status(containerId.parent()); } if (!infos.contains(containerId)) { http://git-wip-us.apache.org/repos/asf/mesos/blob/382b2ea9/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp b/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp index 939142e..50b4377 100644 --- a/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp +++ b/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp @@ -1272,8 +1272,11 @@ Future<ContainerStatus> NetworkCniIsolatorProcess::status( // Hence, in order to obtain the IP address of this nested container // one should always look up the IP address of the root container of // the hierarchy to which this container belongs. + // + // TODO(jieyu): Revisit this once we allow nested containers to use + // different network namespaces than their parent container. if (containerId.has_parent()) { - return Failure("Not supported for nested containers"); + return status(containerId.parent()); } // TODO(jieyu): We don't create 'Info' struct for containers that want http://git-wip-us.apache.org/repos/asf/mesos/blob/382b2ea9/src/slave/slave.cpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index d6c3373..2d91e1c 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -3896,7 +3896,19 @@ void Slave::statusUpdate(StatusUpdate update, const Option<UPID>& pid) metrics.valid_status_updates++; // Before sending update, we need to retrieve the container status. - containerizer->status(executor->containerId) + // + // NOTE: If the executor sets the ContainerID inside the + // ContainerStatus, that indicates that the Task this status update + // is associated with is tied to that container (could be nested). + // Therefore, we need to get the status of that container, instead + // of the top level executor container. + ContainerID containerId = executor->containerId; + if (update.status().has_container_status() && + update.status().container_status().has_container_id()) { + containerId = update.status().container_status().container_id(); + } + + containerizer->status(containerId) .onAny(defer(self(), &Slave::_statusUpdate, update,