Fixed the container status for nested containers.

Prior to this patch, the ContainerStatus inside the TaskStatus for a
task in the default executor is not correct. The agent always gets the
container status of the top level container, even if the task might
correspond to a nested container.

This patch fixed this issue by letting the executor set the container
ID for the task (because only it has the knowledge about this
mapping), and the agent will use that container ID to get the proper
container status.

Review: https://reviews.apache.org/r/53465


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/382b2ea9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/382b2ea9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/382b2ea9

Branch: refs/heads/master
Commit: 382b2ea9b2de9d52b4a78dd3ea8afc3946c60826
Parents: 2c0ddba
Author: Jie Yu <yujie....@gmail.com>
Authored: Thu Nov 3 18:44:58 2016 -0700
Committer: Jie Yu <yujie....@gmail.com>
Committed: Mon Nov 7 16:37:25 2016 -0800

----------------------------------------------------------------------
 src/launcher/default_executor.cpp                     | 11 +++++++++++
 .../containerizer/mesos/isolators/cgroups/cgroups.cpp |  5 ++++-
 .../containerizer/mesos/isolators/network/cni/cni.cpp |  5 ++++-
 src/slave/slave.cpp                                   | 14 +++++++++++++-
 4 files changed, 32 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/382b2ea9/src/launcher/default_executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/default_executor.cpp 
b/src/launcher/default_executor.cpp
index b8a88ad..f4e1ea4 100644
--- a/src/launcher/default_executor.cpp
+++ b/src/launcher/default_executor.cpp
@@ -869,6 +869,17 @@ private:
       status.set_healthy(healthy.get());
     }
 
+    // Fill the container ID associated with this task.
+    // TODO(jieyu): Consider maintain a hashmap between TaskID to
+    // ContainerID so that we don't have to loop through all tasks.
+    foreach (const ContainerID& containerId, containers.keys()) {
+      if (containers[containerId] == taskId) {
+        ContainerStatus* containerStatus = status.mutable_container_status();
+        containerStatus->mutable_container_id()->CopyFrom(containerId);
+        break;
+      }
+    }
+
     Call call;
     call.set_type(Call::UPDATE);
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/382b2ea9/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp 
b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
index 2f6723c..3955cc4 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
@@ -743,8 +743,11 @@ Future<ResourceStatistics> CgroupsIsolatorProcess::usage(
 Future<ContainerStatus> CgroupsIsolatorProcess::status(
     const ContainerID& containerId)
 {
+  // TODO(jieyu): Currently, all nested containers share the same
+  // cgroup as their parent container. Revisit this once this is no
+  // long true.
   if (containerId.has_parent()) {
-    return Failure("Not supported for nested containers");
+    return status(containerId.parent());
   }
 
   if (!infos.contains(containerId)) {

http://git-wip-us.apache.org/repos/asf/mesos/blob/382b2ea9/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp 
b/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp
index 939142e..50b4377 100644
--- a/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp
+++ b/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp
@@ -1272,8 +1272,11 @@ Future<ContainerStatus> 
NetworkCniIsolatorProcess::status(
   // Hence, in order to obtain the IP address of this nested container
   // one should always look up the IP address of the root container of
   // the hierarchy to which this container belongs.
+  //
+  // TODO(jieyu): Revisit this once we allow nested containers to use
+  // different network namespaces than their parent container.
   if (containerId.has_parent()) {
-    return Failure("Not supported for nested containers");
+    return status(containerId.parent());
   }
 
   // TODO(jieyu): We don't create 'Info' struct for containers that want

http://git-wip-us.apache.org/repos/asf/mesos/blob/382b2ea9/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index d6c3373..2d91e1c 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -3896,7 +3896,19 @@ void Slave::statusUpdate(StatusUpdate update, const 
Option<UPID>& pid)
   metrics.valid_status_updates++;
 
   // Before sending update, we need to retrieve the container status.
-  containerizer->status(executor->containerId)
+  //
+  // NOTE: If the executor sets the ContainerID inside the
+  // ContainerStatus, that indicates that the Task this status update
+  // is associated with is tied to that container (could be nested).
+  // Therefore, we need to get the status of that container, instead
+  // of the top level executor container.
+  ContainerID containerId = executor->containerId;
+  if (update.status().has_container_status() &&
+      update.status().container_status().has_container_id()) {
+    containerId = update.status().container_status().container_id();
+  }
+
+  containerizer->status(containerId)
     .onAny(defer(self(),
                  &Slave::_statusUpdate,
                  update,

Reply via email to