This is an automated email from the ASF dual-hosted git repository. gilbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 36a64c869cb04704047b86d3f8d11f1399aa8a8c Author: Gilbert Song <songzihao1...@gmail.com> AuthorDate: Fri Oct 5 12:19:01 2018 -0700 Added an unit test for agent recovery with new cgroup subsystems. Review: https://reviews.apache.org/r/68941 --- src/tests/containerizer/cgroups_isolator_tests.cpp | 147 +++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/src/tests/containerizer/cgroups_isolator_tests.cpp b/src/tests/containerizer/cgroups_isolator_tests.cpp index 368ab93..fccab20 100644 --- a/src/tests/containerizer/cgroups_isolator_tests.cpp +++ b/src/tests/containerizer/cgroups_isolator_tests.cpp @@ -1904,6 +1904,153 @@ TEST_F(CgroupsIsolatorTest, ROOT_CGROUPS_AutoLoadSubsystems) } +// This test verifies that after the agent recovery/upgrade, nested +// containers could still be launched under old containers which +// were launched before agent restarts if there are new cgroup +// subsystems are added in the agent cgroup isolation. +TEST_F(CgroupsIsolatorTest, ROOT_CGROUPS_AgentRecoveryWithNewCgroupSubsystems) +{ + // Disable AuthN on the agent. + slave::Flags flags = CreateSlaveFlags(); + flags.isolation = "filesystem/linux,docker/runtime,cgroups/mem"; + flags.image_providers = "docker"; + flags.authenticate_http_readwrite = false; + + Try<Owned<cluster::Master>> master = StartMaster(); + ASSERT_SOME(master); + + Owned<MasterDetector> detector = master.get()->createDetector(); + + // Start the slave with a static process ID. This allows the executor to + // reconnect with the slave upon a process restart. + const string id("agent"); + + Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), id, flags); + ASSERT_SOME(slave); + + auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); + + v1::FrameworkInfo frameworkInfo = v1::DEFAULT_FRAMEWORK_INFO; + frameworkInfo.set_checkpoint(true); + + EXPECT_CALL(*scheduler, connected(_)) + .WillOnce(v1::scheduler::SendSubscribe(frameworkInfo)); + + Future<Event::Subscribed> subscribed; + EXPECT_CALL(*scheduler, subscribed(_, _)) + .WillOnce(FutureArg<1>(&subscribed)); + + Future<Event::Offers> offers1; + EXPECT_CALL(*scheduler, offers(_, _)) + .WillOnce(FutureArg<1>(&offers1)) + .WillRepeatedly(Return()); + + EXPECT_CALL(*scheduler, heartbeat(_)) + .WillRepeatedly(Return()); // Ignore heartbeats. + + v1::scheduler::TestMesos mesos( + master.get()->pid, ContentType::PROTOBUF, scheduler); + + AWAIT_READY(subscribed); + v1::FrameworkID frameworkId(subscribed->framework_id()); + + v1::ExecutorInfo executorInfo = v1::createExecutorInfo( + "test_default_executor", + None(), + "cpus:0.1;mem:32;disk:32", + v1::ExecutorInfo::DEFAULT); + + // Update `executorInfo` with the subscribed `frameworkId`. + executorInfo.mutable_framework_id()->CopyFrom(frameworkId); + + AWAIT_READY(offers1); + ASSERT_FALSE(offers1->offers().empty()); + + const v1::Offer& offer1 = offers1->offers(0); + + v1::TaskInfo taskInfo1 = v1::createTask( + offer1.agent_id(), + v1::Resources::parse("cpus:0.1;mem:32;disk:32").get(), + "sleep 1000"); + + Future<v1::scheduler::Event::Update> startingUpdate1; + Future<v1::scheduler::Event::Update> runningUpdate1; + EXPECT_CALL(*scheduler, update(_, _)) + .WillOnce(DoAll( + FutureArg<1>(&startingUpdate1), + v1::scheduler::SendAcknowledge(frameworkId, offer1.agent_id()))) + .WillOnce(DoAll( + FutureArg<1>(&runningUpdate1), + v1::scheduler::SendAcknowledge(frameworkId, offer1.agent_id()))) + .WillRepeatedly(Return()); + + mesos.send( + v1::createCallAccept( + frameworkId, + offer1, + {v1::LAUNCH_GROUP( + executorInfo, v1::createTaskGroupInfo({taskInfo1}))})); + + AWAIT_READY(startingUpdate1); + ASSERT_EQ(v1::TASK_STARTING, startingUpdate1->status().state()); + ASSERT_EQ(taskInfo1.task_id(), startingUpdate1->status().task_id()); + + AWAIT_READY(runningUpdate1); + ASSERT_EQ(v1::TASK_RUNNING, runningUpdate1->status().state()); + ASSERT_EQ(taskInfo1.task_id(), runningUpdate1->status().task_id()); + + slave.get()->terminate(); + slave->reset(); + + Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); + + // Update the cgroup isolation to introduce new subsystems. + flags.isolation = "filesystem/linux,docker/runtime,cgroups/all"; + slave = this->StartSlave(detector.get(), id, flags); + ASSERT_SOME(slave); + + AWAIT_READY(__recover); + + Future<Event::Offers> offers2; + EXPECT_CALL(*scheduler, offers(_, _)) + .WillOnce(FutureArg<1>(&offers2)) + .WillRepeatedly(Return()); + + AWAIT_READY(offers2); + ASSERT_FALSE(offers2->offers().empty()); + + const v1::Offer& offer2 = offers2->offers(0); + + v1::TaskInfo taskInfo2 = v1::createTask( + offer2.agent_id(), + v1::Resources::parse("cpus:0.1;mem:32;disk:32").get(), + "sleep 1000"); + + Future<v1::scheduler::Event::Update> startingUpdate2; + Future<v1::scheduler::Event::Update> runningUpdate2; + EXPECT_CALL(*scheduler, update(_, _)) + .WillOnce(DoAll( + FutureArg<1>(&startingUpdate2), + v1::scheduler::SendAcknowledge(frameworkId, offer2.agent_id()))) + .WillOnce(FutureArg<1>(&runningUpdate2)); + + mesos.send( + v1::createCallAccept( + frameworkId, + offer2, + {v1::LAUNCH_GROUP( + executorInfo, v1::createTaskGroupInfo({taskInfo2}))})); + + AWAIT_READY(startingUpdate2); + ASSERT_EQ(v1::TASK_STARTING, startingUpdate2->status().state()); + ASSERT_EQ(taskInfo2.task_id(), startingUpdate2->status().task_id()); + + AWAIT_READY(runningUpdate2); + ASSERT_EQ(v1::TASK_RUNNING, runningUpdate2->status().state()); + ASSERT_EQ(taskInfo2.task_id(), runningUpdate2->status().task_id()); +} + + // This test verifies the container-specific cgroups are correctly mounted // inside the nested container. TEST_F(CgroupsIsolatorTest, ROOT_CGROUPS_NestedContainerSpecificCgroupsMount)