This is an automated email from the ASF dual-hosted git repository. josephwu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 2f0ed6dcd7762eee52d0152d09210b138ae8628a Author: Joseph Wu <josep...@apache.org> AuthorDate: Tue Feb 12 14:19:05 2019 -0800 Added cleanup logic for orphaned operations. The resources used by orphaned operations are not accounted for in the Slave struct's `usedResources`, and must be treated differently when cleaning up the operation. Removal of non-terminal orphan operations will instead augment the `totalResources`. NOTE: The only codepath that can remove non-terminal operations is when removing the agent or a resource provider. In this case, there is no need to update the allocator with the augmented `totalResources` because the removal codepaths will already update the allocator. Review: https://reviews.apache.org/r/69962 --- src/master/master.cpp | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/master/master.cpp b/src/master/master.cpp index da970a5..54f96f9 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -11697,8 +11697,11 @@ void Master::removeOperation(Operation* operation) // If the operation was not speculated and is not terminal we // need to also recover its used resources in the allocator. + // If the operation is an orphan, the resources have already been + // recovered from the allocator. if (!protobuf::isSpeculativeOperation(operation->info()) && - !protobuf::isTerminalState(operation->latest_status().state())) { + !protobuf::isTerminalState(operation->latest_status().state()) && + !slave->orphanedOperations.contains(operation->uuid())) { Try<Resources> consumed = protobuf::getConsumedResources(operation->info()); CHECK_SOME(consumed); @@ -12792,10 +12795,39 @@ void Slave::removeOperation(Operation* operation) CHECK(!resourceProviderId.isError()) << resourceProviderId.error(); - // Recover the resource used by this operation. - if (!protobuf::isSpeculativeOperation(operation->info()) && - !protobuf::isTerminalState(operation->latest_status().state())) { - recoverResources(operation); + if (orphanedOperations.contains(uuid)) { + orphanedOperations.erase(uuid); + + CHECK(!protobuf::isSpeculativeOperation(operation->info())) + << "Orphaned operations can only be non-speculative"; + + // If the orphan is removed before reaching a terminal state, + // the used resources must be added back into the agent's total + // resources. Terminal orphaned operations are handled by + // `Master::updateOperation`. + if (!protobuf::isTerminalState(operation->latest_status().state())) { + Try<Resources> consumed = + protobuf::getConsumedResources(operation->info()); + + CHECK_SOME(consumed); + + Resources consumedUnallocated = consumed.get(); + consumedUnallocated.unallocate(); + + // NOTE: Non-terminal operations are only removed when the agent or + // resource provider is removed. When the agent is removed, the master + // will call `allocator->removeSlave` in `Master::_removeSlave()`. + // When a resource provider is removed, the master will call + // `allocator->updateSlave()` in `Master::updateSlave()`. + // This means we do not need to update the allocator in this method. + totalResources += consumedUnallocated; + } + } else { + // Recover the resource used by this operation. + if (!protobuf::isSpeculativeOperation(operation->info()) && + !protobuf::isTerminalState(operation->latest_status().state())) { + recoverResources(operation); + } } // Remove the operation. @@ -12804,7 +12836,7 @@ void Slave::removeOperation(Operation* operation) << "Unknown operation (uuid: " << uuid << ")" << " to agent " << *this; - operations.erase(operation->uuid()); + operations.erase(uuid); } else { CHECK(resourceProviders.contains(resourceProviderId.get())) << "resource provider " << resourceProviderId.get() << " is unknown"; @@ -12817,7 +12849,7 @@ void Slave::removeOperation(Operation* operation) << " to resource provider " << resourceProviderId.get() << " on agent " << *this; - resourceProvider.operations.erase(operation->uuid()); + resourceProvider.operations.erase(uuid); } }