Repository: mesos Updated Branches: refs/heads/1.0.x d6d1406a9 -> 6e503351a
Fixed master to properly handle pending tasks. NOTE: This backported patch has been modified. Pending tasks are always removed from frameworks `pending` map, irrespective of whether the task launch is successful or not. Review: https://reviews.apache.org/r/52440 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/660a42f7 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/660a42f7 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/660a42f7 Branch: refs/heads/1.0.x Commit: 660a42f73cdd2a6101808e3e9fe79929e26ddc7e Parents: d6d1406 Author: Vinod Kone <vinodk...@gmail.com> Authored: Fri Sep 30 12:38:01 2016 -0700 Committer: Vinod Kone <vinodk...@gmail.com> Committed: Fri Sep 30 14:57:25 2016 -0700 ---------------------------------------------------------------------- src/master/master.cpp | 3 ++ src/tests/master_authorization_tests.cpp | 50 +++++++++++++++++++++++++-- src/tests/master_validation_tests.cpp | 11 ++++++ 3 files changed, 61 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/660a42f7/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index 8def6b3..25c6022 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -3472,6 +3472,9 @@ void Master::_accept( } foreach (const TaskInfo& task, operation.launch().task_infos()) { + // Remove the task from being pending. + framework->pendingTasks.erase(task.task_id()); + const TaskStatus::Reason reason = slave == nullptr ? TaskStatus::REASON_SLAVE_REMOVED : TaskStatus::REASON_SLAVE_DISCONNECTED; http://git-wip-us.apache.org/repos/asf/mesos/blob/660a42f7/src/tests/master_authorization_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/master_authorization_tests.cpp b/src/tests/master_authorization_tests.cpp index e43b264..66c2606 100644 --- a/src/tests/master_authorization_tests.cpp +++ b/src/tests/master_authorization_tests.cpp @@ -227,13 +227,24 @@ TEST_F(MasterAuthorizationTest, UnauthorizedTask) EXPECT_EQ(TASK_ERROR, status.get().state()); EXPECT_EQ(TaskStatus::REASON_TASK_UNAUTHORIZED, status.get().reason()); + // Make sure the task is not known to master anymore. + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .Times(0); + + driver.reconcileTasks({}); + + // We pause the clock here to ensure any updates sent by the master + // are received. There shouldn't be any updates in this case. + Clock::pause(); + Clock::settle(); + driver.stop(); driver.join(); } // This test verifies that a 'killTask()' that comes before -// '_launchTasks()' is called results in TASK_KILLED. +// '_accept()' is called results in TASK_KILLED. TEST_F(MasterAuthorizationTest, KillTask) { MockAuthorizer authorizer; @@ -299,13 +310,24 @@ TEST_F(MasterAuthorizationTest, KillTask) // returned to the allocator. AWAIT_READY(recoverResources); + // Make sure the task is not known to master anymore. + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .Times(0); + + driver.reconcileTasks({}); + + // We pause the clock here to ensure any updates sent by the master + // are received. There shouldn't be any updates in this case. + Clock::pause(); + Clock::settle(); + driver.stop(); driver.join(); } // This test verifies that a slave removal that comes before -// '_launchTasks()' is called results in TASK_LOST. +// '_accept()' is called results in TASK_LOST. TEST_F(MasterAuthorizationTest, SlaveRemoved) { MockAuthorizer authorizer; @@ -388,6 +410,17 @@ TEST_F(MasterAuthorizationTest, SlaveRemoved) EXPECT_EQ( 1u, stats.values["master/task_lost/source_master/reason_slave_removed"]); + // Make sure the task is not known to master anymore. + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .Times(0); + + driver.reconcileTasks({}); + + // We pause the clock here to ensure any updates sent by the master + // are received. There shouldn't be any updates in this case. + Clock::pause(); + Clock::settle(); + driver.stop(); driver.join(); } @@ -481,13 +514,24 @@ TEST_F(MasterAuthorizationTest, SlaveDisconnected) 1u, stats.values["master/task_lost/source_master/reason_slave_removed"]); + // Make sure the task is not known to master anymore. + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .Times(0); + + driver.reconcileTasks({}); + + // We pause the clock here to ensure any updates sent by the master + // are received. There shouldn't be any updates in this case. + Clock::pause(); + Clock::settle(); + driver.stop(); driver.join(); } // This test verifies that a framework removal that comes before -// '_launchTasks()' is called results in recovery of resources. +// '_accept()' is called results in recovery of resources. TEST_F(MasterAuthorizationTest, FrameworkRemoved) { MockAuthorizer authorizer; http://git-wip-us.apache.org/repos/asf/mesos/blob/660a42f7/src/tests/master_validation_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/master_validation_tests.cpp b/src/tests/master_validation_tests.cpp index 9eb82a5..3ee890e 100644 --- a/src/tests/master_validation_tests.cpp +++ b/src/tests/master_validation_tests.cpp @@ -652,6 +652,17 @@ TEST_F(TaskValidationTest, TaskUsesInvalidFrameworkID) EXPECT_TRUE(strings::startsWith( status.get().message(), "ExecutorInfo has an invalid FrameworkID")); + // Make sure the task is not known to master anymore. + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .Times(0); + + driver.reconcileTasks({}); + + // We pause the clock here to ensure any updates sent by the master + // are received. There shouldn't be any updates in this case. + Clock::pause(); + Clock::settle(); + driver.stop(); driver.join(); }