On Tue, Nov 10, 2015 at 3:56 PM, 'Oleg Ponomarev' via ganeti-devel < [email protected]> wrote:
> All the errors during an opcode execution are reported via exceptions. > errors encountered? > Except all the exceptions and execute POST global hooks with FAILED > Intercept all the exceptions > status. > > Signed-off-by: Oleg Ponomarev <[email protected]> > --- > lib/hooksmaster.py | 4 +-- > lib/mcpu.py | 73 > +++++++++++++++++++++++++++++++----------------------- > 2 files changed, 44 insertions(+), 33 deletions(-) > > diff --git a/lib/hooksmaster.py b/lib/hooksmaster.py > index 7c0f037..b092c2c 100644 > --- a/lib/hooksmaster.py > +++ b/lib/hooksmaster.py > @@ -39,7 +39,7 @@ from ganeti import compat > from ganeti import pathutils > > > -def _RpcResultsToHooksResults(rpc_results): > +def RpcResultsToHooksResults(rpc_results): > """Function to convert RPC results to the format expected by > HooksMaster. > > @type rpc_results: dict(node: L{rpc.RpcResult}) > @@ -308,6 +308,6 @@ class HooksMaster(object): > cluster_name = lu.cfg.GetClusterName() > > return HooksMaster(lu.op.OP_ID, lu.HPATH, nodes, hooks_execution_fn, > - _RpcResultsToHooksResults, lu.BuildHooksEnv, > + RpcResultsToHooksResults, lu.BuildHooksEnv, > lu.PreparePostHookNodes, lu.LogWarning, lu.HTYPE, > cluster_name, master_name, job_id) > diff --git a/lib/mcpu.py b/lib/mcpu.py > index 9be8d46..ee01f25 100644 > --- a/lib/mcpu.py > +++ b/lib/mcpu.py > @@ -688,39 +688,50 @@ class Processor(object): > > self._cbs = cbs > try: > - if self._enable_locks: > - # Acquire the Big Ganeti Lock exclusively if this LU requires it, > - # and in a shared fashion otherwise (to prevent concurrent run > with > - # an exclusive LU. > - self._AcquireLocks(locking.LEVEL_CLUSTER, locking.BGL, > - not lu_class.REQ_BGL, False, calc_timeout()) > - elif lu_class.REQ_BGL: > - raise errors.ProgrammerError("Opcode '%s' requires BGL, but locks > are" > - " disabled" % op.OP_ID) > - > - lu = lu_class(self, op, self.cfg, self.rpc, > - self._wconfdcontext, self.wconfd) > - lu.wconfdlocks = self.wconfd.Client().ListLocks(self._wconfdcontext) > - _CheckSecretParameters(op) > - lu.ExpandNames() > - assert lu.needed_locks is not None, "needed_locks not set by LU" > - > try: > - result = self._LockAndExecLU(lu, locking.LEVEL_CLUSTER + 1, > - calc_timeout) > - finally: > - if self._ec_id: > - self.cfg.DropECReservations(self._ec_id) > - finally: > - self.wconfd.Client().FreeLocksLevel( > - self._wconfdcontext, locking.LEVEL_NAMES[locking.LEVEL_CLUSTER]) > - self._cbs = None > This appears to be a suggestion from git diff and not me: this is becoming a nested mess. Consider adding a: - separate function for the inner logic - a context manager for acquiring the BGL and cleaning it up from WConfD > - > - self._CheckLUResult(op, result) > + if self._enable_locks: > + # Acquire the Big Ganeti Lock exclusively if this LU requires > it, > + # and in a shared fashion otherwise (to prevent concurrent run > with > + # an exclusive LU. > + self._AcquireLocks(locking.LEVEL_CLUSTER, locking.BGL, > + not lu_class.REQ_BGL, False, calc_timeout()) > + elif lu_class.REQ_BGL: > + raise errors.ProgrammerError("Opcode '%s' requires BGL, but > locks are" > + " disabled" % op.OP_ID) > + > + lu = lu_class(self, op, self.cfg, self.rpc, > + self._wconfdcontext, self.wconfd) > + lu.wconfdlocks = > self.wconfd.Client().ListLocks(self._wconfdcontext) > + _CheckSecretParameters(op) > + lu.ExpandNames() > + assert lu.needed_locks is not None, "needed_locks not set by LU" > > - if self._hm is not None: > - self._hm.RunPhase(constants.HOOKS_PHASE_POST, None, True, > - constants.POST_HOOKS_STATUS_SUCCEEDED) > + try: > + result = self._LockAndExecLU(lu, locking.LEVEL_CLUSTER + 1, > + calc_timeout) > + finally: > + if self._ec_id: > + self.cfg.DropECReservations(self._ec_id) > + finally: > + self.wconfd.Client().FreeLocksLevel( > + self._wconfdcontext, locking.LEVEL_NAMES[locking.LEVEL_CLUSTER]) > + self._cbs = None > + > + self._CheckLUResult(op, result) > + > + if self._hm is not None: > + self._hm.RunPhase(constants.HOOKS_PHASE_POST, None, True, > + constants.POST_HOOKS_STATUS_SUCCEEDED) > Should this bit of code be covered by the exception? Hook execution failure != job execution failure, and in the worst case you might run both. > + except: > + # execute global post hooks with the failed status on any exception > + hm = self.hmclass(op.OP_ID, None, ([], > [self.cfg.GetMasterNodeName()]), > + self.rpc.call_hooks_runner, > + hooksmaster.RpcResultsToHooksResults, None, None, > + logging.warning, None, self.cfg.GetClusterName(), > + self.cfg.GetMasterNodeName(), self.GetECId()) > + hm.RunPhase(phase=constants.HOOKS_PHASE_POST, glob=True, > + post_status=constants.POST_HOOKS_STATUS_FAILED) > Also, since we can pass node_names into RunPhase, why not use self._hm with the node_names parameter? Presumably it already has to be present for the PRE global hook to be executed whenever global hooks are executed. > + raise > > return result > > -- > 2.6.0.rc2.230.g3dd15c0 > > Hrvoje Ribicic Ganeti Engineering Google Germany GmbH Dienerstr. 12, 80331, München Geschäftsführer: Matthew Scott Sucherman, Paul Terence Manicle Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg Diese E-Mail ist vertraulich. Wenn Sie nicht der richtige Adressat sind, leiten Sie diese bitte nicht weiter, informieren Sie den Absender und löschen Sie die E-Mail und alle Anhänge. Vielen Dank. This e-mail is confidential. If you are not the right addressee please do not forward it, please inform the sender, and please erase this e-mail including any attachments. Thanks.
