Added benchmarks for new registry operations. These benchmarks cover:
1. The time taken to make all registered agents unreachable and then reachable. This is similar to what happens during a severe network partition. 2. The time taken to GC a significant fraction (50%) of the unreachable list from the registry. Review: https://reviews.apache.org/r/51909/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/aae81968 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/aae81968 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/aae81968 Branch: refs/heads/master Commit: aae819684f61f4cb811e0629731a57bdd6f8f3fe Parents: 47bd3e4 Author: Neil Conway <neil.con...@gmail.com> Authored: Mon Sep 19 15:49:04 2016 -0700 Committer: Vinod Kone <vinodk...@gmail.com> Committed: Mon Sep 19 15:49:04 2016 -0700 ---------------------------------------------------------------------- src/tests/registrar_tests.cpp | 141 ++++++++++++++++++++++++++++++++++++- 1 file changed, 139 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/aae81968/src/tests/registrar_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/registrar_tests.cpp b/src/tests/registrar_tests.cpp index 745cded..928bc66 100644 --- a/src/tests/registrar_tests.cpp +++ b/src/tests/registrar_tests.cpp @@ -1263,8 +1263,6 @@ TEST_P(Registrar_BENCHMARK_Test, Performance) Registrar registrar(flags, state); AWAIT_READY(registrar.recover(master)); - vector<SlaveInfo> infos; - Attributes attributes = Attributes::parse("foo:bar;baz:quux"); Resources resources = Resources::parse("cpus(*):1.0;mem(*):512;disk(*):2048").get(); @@ -1272,6 +1270,7 @@ TEST_P(Registrar_BENCHMARK_Test, Performance) size_t slaveCount = GetParam(); // Create slaves. + vector<SlaveInfo> infos; for (size_t i = 0; i < slaveCount; ++i) { // Simulate real slave information. SlaveInfo info; @@ -1333,6 +1332,144 @@ TEST_P(Registrar_BENCHMARK_Test, Performance) cout << "Removed " << slaveCount << " agents in " << watch.elapsed() << endl; } + +// Test the performance of marking all registered slaves unreachable, +// then marking them reachable again. This might occur if there is a +// network partition and then the partition heals. +TEST_P(Registrar_BENCHMARK_Test, MarkUnreachableThenReachable) +{ + Registrar registrar(flags, state); + AWAIT_READY(registrar.recover(master)); + + Attributes attributes = Attributes::parse("foo:bar;baz:quux"); + Resources resources = + Resources::parse("cpus(*):1.0;mem(*):512;disk(*):2048").get(); + + size_t slaveCount = GetParam(); + + // Create slaves. + vector<SlaveInfo> infos; + for (size_t i = 0; i < slaveCount; ++i) { + // Simulate real slave information. + SlaveInfo info; + info.set_hostname("localhost"); + info.mutable_id()->set_value( + string("201310101658-2280333834-5050-48574-") + stringify(i)); + info.mutable_resources()->MergeFrom(resources); + info.mutable_attributes()->MergeFrom(attributes); + infos.push_back(info); + } + + // Admit slaves. + Stopwatch watch; + watch.start(); + Future<bool> result; + foreach (const SlaveInfo& info, infos) { + result = registrar.apply(Owned<Operation>(new AdmitSlave(info))); + } + AWAIT_READY_FOR(result, Minutes(5)); + LOG(INFO) << "Admitted " << slaveCount << " agents in " << watch.elapsed(); + + // Shuffle the slaves so that we mark them unreachable in random + // order (same as in production). + std::random_shuffle(infos.begin(), infos.end()); + + // Mark all slaves unreachable. + TimeInfo unreachableTime = protobuf::getCurrentTime(); + + watch.start(); + foreach (const SlaveInfo& info, infos) { + result = registrar.apply( + Owned<Operation>(new MarkSlaveUnreachable(info, unreachableTime))); + } + AWAIT_READY_FOR(result, Minutes(5)); + cout << "Marked " << slaveCount << " agents unreachable in " + << watch.elapsed() << endl; + + // Shuffles the slaves again so that we mark them reachable in + // random order (same as in production). + std::random_shuffle(infos.begin(), infos.end()); + + // Mark all slaves reachable. + watch.start(); + foreach (const SlaveInfo& info, infos) { + result = registrar.apply( + Owned<Operation>(new MarkSlaveReachable(info))); + } + AWAIT_READY_FOR(result, Minutes(5)); + cout << "Marked " << slaveCount << " agents reachable in " + << watch.elapsed() << endl; +} + + +// Test the performance of garbage collecting a large portion of the +// unreachable list in a single operation. We use a fixed percentage +// at the moment (50%). +TEST_P(Registrar_BENCHMARK_Test, GcManyAgents) +{ + Registrar registrar(flags, state); + AWAIT_READY(registrar.recover(master)); + + Attributes attributes = Attributes::parse("foo:bar;baz:quux"); + Resources resources = + Resources::parse("cpus(*):1.0;mem(*):512;disk(*):2048").get(); + + size_t slaveCount = GetParam(); + + // Create slaves. + vector<SlaveInfo> infos; + for (size_t i = 0; i < slaveCount; ++i) { + // Simulate real slave information. + SlaveInfo info; + info.set_hostname("localhost"); + info.mutable_id()->set_value( + string("201310101658-2280333834-5050-48574-") + stringify(i)); + info.mutable_resources()->MergeFrom(resources); + info.mutable_attributes()->MergeFrom(attributes); + infos.push_back(info); + } + + // Admit slaves. + Stopwatch watch; + watch.start(); + Future<bool> result; + foreach (const SlaveInfo& info, infos) { + result = registrar.apply(Owned<Operation>(new AdmitSlave(info))); + } + AWAIT_READY_FOR(result, Minutes(5)); + LOG(INFO) << "Admitted " << slaveCount << " agents in " << watch.elapsed(); + + // Shuffle the slaves so that we mark them unreachable in random + // order (same as in production). + std::random_shuffle(infos.begin(), infos.end()); + + // Mark all slaves unreachable. + TimeInfo unreachableTime = protobuf::getCurrentTime(); + + watch.start(); + foreach (const SlaveInfo& info, infos) { + result = registrar.apply( + Owned<Operation>(new MarkSlaveUnreachable(info, unreachableTime))); + } + AWAIT_READY_FOR(result, Minutes(5)); + LOG(INFO) << "Marked " << slaveCount << " agents unreachable in " + << watch.elapsed() << endl; + + // Prepare to GC the first half of the unreachable list. + hashset<SlaveID> toRemove; + for (size_t i = 0; (i * 2) < slaveCount; i++) { + const SlaveInfo& info = infos[i]; + toRemove.insert(info.id()); + } + + // Do GC. + watch.start(); + result = registrar.apply(Owned<Operation>(new PruneUnreachable(toRemove))); + AWAIT_READY_FOR(result, Minutes(5)); + cout << "Garbage collected " << toRemove.size() << " agents in " + << watch.elapsed() << endl; +} + } // namespace tests { } // namespace internal { } // namespace mesos {