Repository: mesos Updated Branches: refs/heads/0.28.x d01e3f04c -> a429a3688
Fixed master to properly handle pending tasks. NOTE: This backported patch has been modified. Pending tasks are always removed from frameworks `pending` map, irrespective of whether the task launch is successful or not. Review: https://reviews.apache.org/r/52440 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/8949e277 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/8949e277 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/8949e277 Branch: refs/heads/0.28.x Commit: 8949e2775473bd4b064744a05ab49bbca7142c48 Parents: d01e3f0 Author: Vinod Kone <vinodk...@gmail.com> Authored: Fri Sep 30 12:38:01 2016 -0700 Committer: Vinod Kone <vinodk...@gmail.com> Committed: Fri Sep 30 15:00:44 2016 -0700 ---------------------------------------------------------------------- src/master/master.cpp | 3 ++ src/tests/master_authorization_tests.cpp | 50 +++++++++++++++++++++++++-- src/tests/master_validation_tests.cpp | 11 ++++++ 3 files changed, 61 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/8949e277/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index 07f4751..1b09b44 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -3234,6 +3234,9 @@ void Master::_accept( } foreach (const TaskInfo& task, operation.launch().task_infos()) { + // Remove the task from being pending. + framework->pendingTasks.erase(task.task_id()); + const TaskStatus::Reason reason = slave == NULL ? TaskStatus::REASON_SLAVE_REMOVED : TaskStatus::REASON_SLAVE_DISCONNECTED; http://git-wip-us.apache.org/repos/asf/mesos/blob/8949e277/src/tests/master_authorization_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/master_authorization_tests.cpp b/src/tests/master_authorization_tests.cpp index 29c89fb..1b3e23a 100644 --- a/src/tests/master_authorization_tests.cpp +++ b/src/tests/master_authorization_tests.cpp @@ -202,6 +202,17 @@ TEST_F(MasterAuthorizationTest, UnauthorizedTask) EXPECT_EQ(TASK_ERROR, status.get().state()); EXPECT_EQ(TaskStatus::REASON_TASK_UNAUTHORIZED, status.get().reason()); + // Make sure the task is not known to master anymore. + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .Times(0); + + driver.reconcileTasks({}); + + // We pause the clock here to ensure any updates sent by the master + // are received. There shouldn't be any updates in this case. + Clock::pause(); + Clock::settle(); + driver.stop(); driver.join(); @@ -210,7 +221,7 @@ TEST_F(MasterAuthorizationTest, UnauthorizedTask) // This test verifies that a 'killTask()' that comes before -// '_launchTasks()' is called results in TASK_KILLED. +// '_accept()' is called results in TASK_KILLED. TEST_F(MasterAuthorizationTest, KillTask) { MockAuthorizer authorizer; @@ -274,6 +285,17 @@ TEST_F(MasterAuthorizationTest, KillTask) // returned to the allocator. AWAIT_READY(recoverResources); + // Make sure the task is not known to master anymore. + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .Times(0); + + driver.reconcileTasks({}); + + // We pause the clock here to ensure any updates sent by the master + // are received. There shouldn't be any updates in this case. + Clock::pause(); + Clock::settle(); + driver.stop(); driver.join(); @@ -282,7 +304,7 @@ TEST_F(MasterAuthorizationTest, KillTask) // This test verifies that a slave removal that comes before -// '_launchTasks()' is called results in TASK_LOST. +// '_accept()' is called results in TASK_LOST. TEST_F(MasterAuthorizationTest, SlaveRemoved) { MockAuthorizer authorizer; @@ -362,6 +384,17 @@ TEST_F(MasterAuthorizationTest, SlaveRemoved) EXPECT_EQ( 1u, stats.values["master/task_lost/source_master/reason_slave_removed"]); + // Make sure the task is not known to master anymore. + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .Times(0); + + driver.reconcileTasks({}); + + // We pause the clock here to ensure any updates sent by the master + // are received. There shouldn't be any updates in this case. + Clock::pause(); + Clock::settle(); + driver.stop(); driver.join(); @@ -454,6 +487,17 @@ TEST_F(MasterAuthorizationTest, SlaveDisconnected) 1u, stats.values["master/task_lost/source_master/reason_slave_removed"]); + // Make sure the task is not known to master anymore. + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .Times(0); + + driver.reconcileTasks({}); + + // We pause the clock here to ensure any updates sent by the master + // are received. There shouldn't be any updates in this case. + Clock::pause(); + Clock::settle(); + driver.stop(); driver.join(); @@ -462,7 +506,7 @@ TEST_F(MasterAuthorizationTest, SlaveDisconnected) // This test verifies that a framework removal that comes before -// '_launchTasks()' is called results in recovery of resources. +// '_accept()' is called results in recovery of resources. TEST_F(MasterAuthorizationTest, FrameworkRemoved) { MockAuthorizer authorizer; http://git-wip-us.apache.org/repos/asf/mesos/blob/8949e277/src/tests/master_validation_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/master_validation_tests.cpp b/src/tests/master_validation_tests.cpp index c9bc38c..6664067 100644 --- a/src/tests/master_validation_tests.cpp +++ b/src/tests/master_validation_tests.cpp @@ -629,6 +629,17 @@ TEST_F(TaskValidationTest, TaskUsesInvalidFrameworkID) EXPECT_TRUE(strings::startsWith( status.get().message(), "ExecutorInfo has an invalid FrameworkID")); + // Make sure the task is not known to master anymore. + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .Times(0); + + driver.reconcileTasks({}); + + // We pause the clock here to ensure any updates sent by the master + // are received. There shouldn't be any updates in this case. + Clock::pause(); + Clock::settle(); + driver.stop(); driver.join();