Removed a no-longer-relevant test. The behavior this test is trying to validate (slaves receive a `ShutdownMessage` if they attempt to reregister after failing health checks) will be changed shortly. Moreover, the new behavior is already covered by other test cases.
Review: https://reviews.apache.org/r/50703/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0b90ccca Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0b90ccca Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0b90ccca Branch: refs/heads/master Commit: 0b90cccaca0069a2e2fff54d1424d205659346a3 Parents: 93016d3 Author: Neil Conway <neil.con...@gmail.com> Authored: Fri Aug 26 14:48:39 2016 -0700 Committer: Vinod Kone <vinodk...@gmail.com> Committed: Fri Aug 26 14:49:49 2016 -0700 ---------------------------------------------------------------------- src/tests/slave_recovery_tests.cpp | 142 -------------------------------- 1 file changed, 142 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/0b90ccca/src/tests/slave_recovery_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp index 9ff19f4..3c69e56 100644 --- a/src/tests/slave_recovery_tests.cpp +++ b/src/tests/slave_recovery_tests.cpp @@ -3186,148 +3186,6 @@ TYPED_TEST(SlaveRecoveryTest, SchedulerFailover) } -// The purpose of this test is to ensure that during a network -// partition, the master will remove a partitioned slave. When the -// partition is removed, the slave will receive a ShutdownMessage. -// When the slave starts again on the same host, we verify that the -// slave will not try to reregister itself with the master. It will -// register itself with the master and get a new slave id. -TYPED_TEST(SlaveRecoveryTest, PartitionedSlave) -{ - master::Flags masterFlags = this->CreateMasterFlags(); - Try<Owned<cluster::Master>> master = this->StartMaster(masterFlags); - ASSERT_SOME(master); - - // Set these expectations up before we spawn the slave so that we - // don't miss the first PING. - Future<Message> ping = FUTURE_MESSAGE( - Eq(PingSlaveMessage().GetTypeName()), _, _); - - // Drop all the PONGs to simulate slave partition. - DROP_PROTOBUFS(PongSlaveMessage(), _, _); - - slave::Flags flags = this->CreateSlaveFlags(); - - Fetcher fetcher; - - Try<TypeParam*> _containerizer = TypeParam::create(flags, true, &fetcher); - ASSERT_SOME(_containerizer); - Owned<slave::Containerizer> containerizer(_containerizer.get()); - - Owned<MasterDetector> detector = master.get()->createDetector(); - - Try<Owned<cluster::Slave>> slave = - this->StartSlave(detector.get(), containerizer.get(), flags); - ASSERT_SOME(slave); - - // Enable checkpointing for the framework. - FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; - frameworkInfo.set_checkpoint(true); - - MockScheduler sched; - MesosSchedulerDriver driver( - &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); - - EXPECT_CALL(sched, registered(_, _, _)); - - Future<vector<Offer>> offers; - EXPECT_CALL(sched, resourceOffers(&driver, _)) - .WillOnce(FutureArg<1>(&offers)) - .WillRepeatedly(Return()); - - driver.start(); - - AWAIT_READY(offers); - EXPECT_NE(0u, offers.get().size()); - - // Long running task. - TaskInfo task = createTask(offers.get()[0], "sleep 1000"); - - EXPECT_CALL(sched, statusUpdate(_, _)); - - Future<Nothing> _statusUpdateAcknowledgement = - FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); - - driver.launchTasks(offers.get()[0].id(), {task}); - - // Wait for the ACK to be checkpointed. - AWAIT_READY(_statusUpdateAcknowledgement); - - Future<ShutdownMessage> shutdownMessage = - FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); - - Future<Nothing> slaveLost; - EXPECT_CALL(sched, slaveLost(&driver, _)) - .WillOnce(FutureSatisfy(&slaveLost)); - - Future<TaskStatus> status; - EXPECT_CALL(sched, statusUpdate(&driver, _)) - .WillOnce(FutureArg<1>(&status)); - - Future<Nothing> executorTerminated = - FUTURE_DISPATCH(_, &Slave::executorTerminated); - - Clock::pause(); - - // Now, induce a partition of the slave by having the master - // timeout the slave. - size_t pings = 0; - while (true) { - AWAIT_READY(ping); - pings++; - if (pings == masterFlags.max_agent_ping_timeouts) { - break; - } - ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); - Clock::advance(masterFlags.agent_ping_timeout); - } - - Clock::advance(masterFlags.agent_ping_timeout); - - // The master will notify the framework that the slave was lost. - AWAIT_READY(slaveLost); - - // The master will have notified the framework of the lost task. - AWAIT_READY(status); - - EXPECT_EQ(TASK_LOST, status.get().state()); - EXPECT_EQ(TaskStatus::SOURCE_MASTER, status.get().source()); - EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, status.get().reason()); - - // Wait for the master to attempt to shut down the slave. - AWAIT_READY(shutdownMessage); - - // Wait for the executor to be terminated. - while (executorTerminated.isPending()) { - Clock::advance(process::MAX_REAP_INTERVAL()); - Clock::settle(); - } - - AWAIT_READY(executorTerminated); - Clock::settle(); - - Clock::resume(); - - slave.get()->terminate(); - - Future<RegisterSlaveMessage> registerSlaveMessage = - FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _); - - // Restart the slave (use same flags) with a new isolator. - _containerizer = TypeParam::create(flags, true, &fetcher); - ASSERT_SOME(_containerizer); - containerizer.reset(_containerizer.get()); - - slave = this->StartSlave(detector.get(), containerizer.get(), flags); - ASSERT_SOME(slave); - - AWAIT_READY(registerSlaveMessage); - - driver.stop(); - driver.join(); -} - - // This test verifies that if the master changes when the slave is // down, the slave can still recover the task when it restarts. We // verify its correctness by killing the task from the scheduler.