Repository: mesos Updated Branches: refs/heads/master 89b43e003 -> 02f13c9aa
Exposed metrics in scheduler library. Exposed metrics scheduler/event_queue_messages(size of message queue) and scheduler/event_queue_dispatches (size of dispatch queue) in scheduler library. Review: https://reviews.apache.org/r/51501/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/02f13c9a Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/02f13c9a Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/02f13c9a Branch: refs/heads/master Commit: 02f13c9aaa87cf7d0f3f6596edc98d8726ba0d35 Parents: 89b43e0 Author: Abhishek Dasgupta <a10gu...@linux.vnet.ibm.com> Authored: Fri Sep 9 09:52:49 2016 +0200 Committer: Vinod Kone <vinodk...@gmail.com> Committed: Fri Sep 9 09:52:49 2016 +0200 ---------------------------------------------------------------------- src/scheduler/scheduler.cpp | 44 +++++++++++++++++++++++++ src/tests/scheduler_tests.cpp | 67 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/02f13c9a/src/scheduler/scheduler.cpp ---------------------------------------------------------------------- diff --git a/src/scheduler/scheduler.cpp b/src/scheduler/scheduler.cpp index 276ed10..2ed6ce2 100644 --- a/src/scheduler/scheduler.cpp +++ b/src/scheduler/scheduler.cpp @@ -48,6 +48,9 @@ #include <process/process.hpp> #include <process/protobuf.hpp> +#include <process/metrics/gauge.hpp> +#include <process/metrics/metrics.hpp> + #include <stout/check.hpp> #include <stout/duration.hpp> #include <stout/error.hpp> @@ -141,6 +144,7 @@ public: const Flags& _flags) : ProcessBase(ID::generate("scheduler")), state(DISCONNECTED), + metrics(*this), contentType(_contentType), callbacks {connected, disconnected, received}, credential(_credential), @@ -711,6 +715,46 @@ private: UNREACHABLE(); } + struct Metrics + { + Metrics(const MesosProcess& mesosProcess) + : event_queue_messages( + "scheduler/event_queue_messages", + defer(mesosProcess, &MesosProcess::_event_queue_messages)), + event_queue_dispatches( + "scheduler/event_queue_dispatches", + defer(mesosProcess, + &MesosProcess::_event_queue_dispatches)) + { + // TODO(dhamon): When we start checking the return value of 'add' we may + // get failures in situations where multiple SchedulerProcesses are active + // (ie, the fault tolerance tests). At that point we'll need MESOS-1285 to + // be fixed and to use self().id in the metric name. + process::metrics::add(event_queue_messages); + process::metrics::add(event_queue_dispatches); + } + + ~Metrics() + { + process::metrics::remove(event_queue_messages); + process::metrics::remove(event_queue_dispatches); + } + + // Process metrics. + process::metrics::Gauge event_queue_messages; + process::metrics::Gauge event_queue_dispatches; + } metrics; + + double _event_queue_messages() + { + return static_cast<double>(eventCount<MessageEvent>()); + } + + double _event_queue_dispatches() + { + return static_cast<double>(eventCount<DispatchEvent>()); + } + // There can be multiple simulataneous ongoing (re-)connection attempts with // the master (e.g., the master failed over while an attempt was in progress). // This helps us in uniquely identifying the current connection instance and http://git-wip-us.apache.org/repos/asf/mesos/blob/02f13c9a/src/tests/scheduler_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/scheduler_tests.cpp b/src/tests/scheduler_tests.cpp index 931c185..b0ea0bb 100644 --- a/src/tests/scheduler_tests.cpp +++ b/src/tests/scheduler_tests.cpp @@ -36,6 +36,8 @@ #include <process/pid.hpp> #include <process/queue.hpp> +#include <process/metrics/metrics.hpp> + #include <stout/lambda.hpp> #include <stout/try.hpp> @@ -71,6 +73,10 @@ using process::Owned; using process::PID; using process::Queue; +using process::http::OK; + +using process::metrics::internal::MetricsProcess; + using std::cout; using std::endl; using std::string; @@ -312,6 +318,67 @@ TEST_P(SchedulerTest, MasterFailover) } +// This test verifies that scheduler library also exposes metrics like +// scheduler driver. +TEST_P(SchedulerTest, MetricsEndpoint) +{ + Try<Owned<cluster::Master>> master = StartMaster(); + ASSERT_SOME(master); + + auto scheduler = std::make_shared<MockV1HTTPScheduler>(); + + Future<Nothing> connected; + EXPECT_CALL(*scheduler, connected(_)) + .WillOnce(FutureSatisfy(&connected)); + + ContentType contentType = GetParam(); + + scheduler::TestV1Mesos mesos(master.get()->pid, contentType, scheduler); + + AWAIT_READY(connected); + + Future<Event::Subscribed> subscribed; + EXPECT_CALL(*scheduler, subscribed(_, _)) + .WillOnce(FutureArg<1>(&subscribed)); + + EXPECT_CALL(*scheduler, heartbeat(_)) + .WillRepeatedly(Return()); // Ignore heartbeats. + + { + Call call; + call.set_type(Call::SUBSCRIBE); + Call::Subscribe* subscribe = call.mutable_subscribe(); + subscribe->mutable_framework_info()->CopyFrom(DEFAULT_V1_FRAMEWORK_INFO); + + mesos.send(call); + } + + AWAIT_READY(subscribed); + + Future<process::http::Response> response = + process::http::get(MetricsProcess::instance()->self(), "snapshot"); + + AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); + AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response); + + Try<JSON::Object> parse = JSON::parse<JSON::Object>(response.get().body); + + ASSERT_SOME(parse); + + JSON::Object metrics = parse.get(); + + // "scheduler/event_queue_messages" metric reports the number of message + // events in event queue. Message events are invoked when any custom + // message is generated by the executor. + EXPECT_EQ(1u, metrics.values.count("scheduler/event_queue_messages")); + + // "scheduler/event_queue_dispatches" metric reports the number of dispatch + // events in event queue. Dispatch events are invoked when any function is + // dispatched as process as a result of any call by scheduler. + EXPECT_EQ(1u, metrics.values.count("scheduler/event_queue_dispatches")); +} + + TEST_P(SchedulerTest, TaskRunning) { Try<Owned<cluster::Master>> master = StartMaster();