Changed master to add `FrameworkInfo` to agent reconcilation. When an agent re-registers with a master that already knows about that agent, the master needs to reconcile its view of the state of the agent with the agent's current state. For any task that the master thinks should be on the agent but isn't included in the ReregisterSlaveMessage, the master does a reconcilation with the agent to find the task's current state.
This commit changes adds the `FrameworkInfo` for any possibly missing tasks to the master -> agent reconciliation message. This is useful because the agent can consult the `FrameworkInfo` during reconciliation: this will shortly be used to make agent reconciliation behave differently for partition-aware frameworks. Review: https://reviews.apache.org/r/52722/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b7d8c29c Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b7d8c29c Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b7d8c29c Branch: refs/heads/master Commit: b7d8c29cdcb29a3a0ca1551e73745cd28c796729 Parents: 9225ac2 Author: Neil Conway <neil.con...@gmail.com> Authored: Fri Oct 21 14:13:08 2016 -0700 Committer: Vinod Kone <vinodk...@gmail.com> Committed: Fri Oct 21 14:13:08 2016 -0700 ---------------------------------------------------------------------- src/master/master.cpp | 15 ++++++++++++++- src/messages/messages.proto | 27 +++++++++++++++++++++------ 2 files changed, 35 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/b7d8c29c/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index 85cab42..a52adb3 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -6804,7 +6804,6 @@ void Master::reconcileKnownSlave( foreachkey (const FrameworkID& frameworkId, slave->tasks) { ReconcileTasksMessage reconcile; - reconcile.mutable_framework_id()->CopyFrom(frameworkId); foreachvalue (Task* task, slave->tasks[frameworkId]) { if (!slaveTasks.contains(task->framework_id(), task->task_id())) { @@ -6832,6 +6831,20 @@ void Master::reconcileKnownSlave( } if (reconcile.statuses_size() > 0) { + // NOTE: This function is only invoked when a slave reregisters + // with a master that previously knew about the slave and has + // not marked it unreachable. If the master has any tasks for + // the agent that are not known to the agent itself, it MUST + // have the FrameworkInfo for those tasks. This is because if a + // master has a task that the agent doesn't know about, the + // framework must have reregistered with this master since the + // last master failover. + Framework* framework = getFramework(frameworkId); + CHECK_NOTNULL(framework); + + reconcile.mutable_framework_id()->CopyFrom(frameworkId); + reconcile.mutable_framework()->CopyFrom(framework->info); + reregistered.add_reconciliations()->CopyFrom(reconcile); } } http://git-wip-us.apache.org/repos/asf/mesos/blob/b7d8c29c/src/messages/messages.proto ---------------------------------------------------------------------- diff --git a/src/messages/messages.proto b/src/messages/messages.proto index 7d65be1..4e51236 100644 --- a/src/messages/messages.proto +++ b/src/messages/messages.proto @@ -371,16 +371,31 @@ message LostSlaveMessage { /** - * Allows the scheduler to query the status for non-terminal tasks. - * This causes the master to send back the latest task status for - * each task in `statuses`, if possible. Tasks that are no longer - * known will result in a `TASK_LOST` update. If `statuses` is empty, - * then the master will send the latest status for each task - * currently known. + * This message is used in two situations: + * + * (a) schedulers can query masters about the master's view of the + * state of one or more tasks. If the `statuses` field is empty + * ("implicit reconciliation"), the master will respond with + * status updates for all of the non-terminal tasks it knows + * about. + * + * (b) the master can query an agent about the agent's view of the + * state of one or more tasks. + * + * In both cases, the response to this message is returned via zero or + * more status update messages with the `reason` field set to + * `REASON_RECONCILIATION`. */ message ReconcileTasksMessage { required FrameworkID framework_id = 1; repeated TaskStatus statuses = 2; // Should be non-terminal only. + + // Should only be set for reconciliation requests sent to agents by + // the master (case (b) above). This is necessary because the agent + // might not know anything about the framework, and the correct + // response to the reconciliation request might depend on the + // framework's capabilities (e.g., PARTITION_AWARE). + optional FrameworkInfo framework = 3; }