This is an automated email from the ASF dual-hosted git repository. vinodkone pushed a commit to branch 1.5.x in repository https://gitbox.apache.org/repos/asf/mesos.git
commit a91b08778b96a0d6b47d96ece4c817eb6daaa0c8 Author: Vinod Kone <vinodk...@gmail.com> AuthorDate: Tue Feb 5 16:55:19 2019 -0600 Tested unreachable task behavior on agent GC. Updated `PartitionTest, RegistryGcByCount` test. This test fails without the previous patch. Review: https://reviews.apache.org/r/69909 --- src/tests/partition_tests.cpp | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/src/tests/partition_tests.cpp b/src/tests/partition_tests.cpp index e7f4ebe..2f121ef 100644 --- a/src/tests/partition_tests.cpp +++ b/src/tests/partition_tests.cpp @@ -2603,16 +2603,30 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(PartitionTest, RegistryGcByCount) EXPECT_CALL(sched, registered(&driver, _, _)); - Future<Nothing> resourceOffers; + Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) - .WillOnce(FutureSatisfy(&resourceOffers)) + .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Need to make sure the framework AND slave have registered with // master. Waiting for resource offers should accomplish both. - AWAIT_READY(resourceOffers); + AWAIT_READY(offers); + + TaskInfo task = createTask(offers->at(0), SLEEP_COMMAND(60)); + + Future<TaskStatus> unreachableStatus; + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .WillOnce(FutureArg<1>(&unreachableStatus)); + + // Drop RunTaskMessage so that the task stays in TASK_STAGING. + Future<RunTaskMessage> runTaskMessage = + DROP_PROTOBUF(RunTaskMessage(), _, _); + + driver.launchTasks(offers->at(0).id(), {task}); + + AWAIT_READY(runTaskMessage); EXPECT_CALL(sched, offerRescinded(&driver, _)) .WillRepeatedly(Return()); @@ -2644,6 +2658,8 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(PartitionTest, RegistryGcByCount) Clock::advance(Milliseconds(100)); + AWAIT_READY(unreachableStatus); + AWAIT_READY(slaveLost1); // Shutdown the first slave. This is necessary because we only drop @@ -2701,13 +2717,13 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(PartitionTest, RegistryGcByCount) slave2.get()->terminate(); slave2->reset(); - // Do explicit reconciliation for a random task ID on `slave1`. GC + // Do explicit reconciliation for the task launched on `slave1`. GC // has not occurred yet (since `registry_gc_interval` has not // elapsed since the master was started), so the slave should be in // the unreachable list; hence `unreachable_time` should be set on // the result of the reconciliation request. TaskStatus status1; - status1.mutable_task_id()->set_value(id::UUID::random().toString()); + status1.mutable_task_id()->CopyFrom(task.task_id()); status1.mutable_slave_id()->CopyFrom(slaveId1); status1.set_state(TASK_STAGING); // Dummy value. @@ -2722,15 +2738,23 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(PartitionTest, RegistryGcByCount) EXPECT_EQ(TaskStatus::REASON_RECONCILIATION, reconcileUpdate1->reason()); EXPECT_EQ(partitionTime1, reconcileUpdate1->unreachable_time()); + JSON::Object stats = Metrics(); + EXPECT_EQ(1, stats.values["master/tasks_unreachable"]); + // Advance the clock to cause GC to be performed. Clock::advance(masterFlags.registry_gc_interval); Clock::settle(); - // Do explicit reconciliation for a random task ID on `slave1`. + // Ensure task has been removed from unreachable list once the + // agent is GC'ed. + stats = Metrics(); + EXPECT_EQ(0, stats.values["master/tasks_unreachable"]); + + // Do explicit reconciliation for the task launched on `slave1`. // Because the agent has been removed from the unreachable list in // the registry, `unreachable_time` should NOT be set. TaskStatus status2; - status2.mutable_task_id()->set_value(id::UUID::random().toString()); + status2.mutable_task_id()->CopyFrom(task.task_id()); status2.mutable_slave_id()->CopyFrom(slaveId1); status2.set_state(TASK_STAGING); // Dummy value. @@ -2764,7 +2788,7 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(PartitionTest, RegistryGcByCount) EXPECT_EQ(TaskStatus::REASON_RECONCILIATION, reconcileUpdate3->reason()); EXPECT_EQ(partitionTime2, reconcileUpdate3->unreachable_time()); - JSON::Object stats = Metrics(); + stats = Metrics(); EXPECT_EQ(2, stats.values["master/slave_unreachable_scheduled"]); EXPECT_EQ(2, stats.values["master/slave_unreachable_completed"]); EXPECT_EQ(2, stats.values["master/slave_removals"]);