On Mon, Mar 3, 2014 at 4:05 PM, Jose A. Lopes <[email protected]> wrote: > This patch changes the watcher to check whether an instance that is > down is also locked by some LU before attempting to restart the > instance. Without checking the lock status, the watcher could think > that an instance that is being failed over is actually down, for > example. > > This problem occurs because there is a significant time window between > 'xm stop' and 'xm destroy' during which an instance is reported as > being down, but the watcher should not act during this period. > > This fixes issue 734. > This introduces issue 743. > > Unfortunately, this fix introduces a race condition given at the > moment it is not possible to query the instance status and the lock > status simultaneously. It won't be possible to fix this race > condition until after the locks have been migrated completely to > Haskell and the cluster configuration can be functionally updated in > Haskell, which will also allow the simultaneous queries.
How bad is this? Just a theoretical possibility or a race condition that could actually show up more or less frequently? > > Signed-off-by: Jose A. Lopes <[email protected]> > --- > lib/watcher/__init__.py | 33 +++++++++++++++++++++++++++------ > 1 file changed, 27 insertions(+), 6 deletions(-) > > diff --git a/lib/watcher/__init__.py b/lib/watcher/__init__.py > index a56ee4e..5abd282 100644 > --- a/lib/watcher/__init__.py > +++ b/lib/watcher/__init__.py > @@ -173,9 +173,20 @@ class Node: > self.secondaries = secondaries > > > -def _CleanupInstance(cl, notepad, inst): > +def _CleanupInstance(cl, notepad, inst, locks): > n = notepad.NumberOfCleanupAttempts(inst.name) > > + lock = None > + for (name, lock_status) in locks: > + if name == inst.name: > + lock = lock_status > + break I'm not entirely convinced by this. The number of instances can be big, and each of them might have a few locks. Therefore, the list of tuples (name, lock_status) can be quite long, and iterating over it seems to be not really optimal. Wouldn't it be better to have the locks structure as a dictionary where the key is the name of the instance? Also, if I'm not mistaken we don't really care about which locks are taken, but only whether the instance is locked, so this would make the required data structure even easier and smaller: {name: bool_is_locked, ...} What do you think? > + > + if lock is not None: > + logging.info("Not cleaning up instance '%s', instance is locked", > + inst.name) > + return > + > if n > MAXTRIES: > logging.warning("Not cleaning up instance '%s', retries exhausted", > inst.name) > @@ -194,7 +205,7 @@ def _CleanupInstance(cl, notepad, inst): > notepad.RecordCleanupAttempt(inst.name) > > > -def _CheckInstances(cl, notepad, instances): > +def _CheckInstances(cl, notepad, instances, locks): > """Make a pass over the list of instances, restarting downed ones. > > """ > @@ -204,7 +215,7 @@ def _CheckInstances(cl, notepad, instances): > > for inst in instances.values(): > if inst.status == constants.INSTST_USERDOWN: > - _CleanupInstance(cl, notepad, inst) > + _CleanupInstance(cl, notepad, inst, locks) > elif inst.status in BAD_STATES: > n = notepad.NumberOfRestartAttempts(inst.name) > > @@ -648,6 +659,15 @@ def _GetGroupData(qcl, uuid): > """Retrieves instances and nodes per node group. > > """ > + locks = qcl.Query(constants.QR_LOCK, ["name", "mode"], None) > + > + prefix = "instance/" > + prefix_len = len(prefix) > + > + locks = [(name[prefix_len:], lock) > + for [[_, name], [_, lock]] in locks.data > + if name.startswith(prefix)] > + > queries = [ > (constants.QR_INSTANCE, > ["name", "status", "disks_active", "snodes", > @@ -693,7 +713,8 @@ def _GetGroupData(qcl, uuid): > for (name, bootid, offline) in raw_nodes] > > return (dict((node.name, node) for node in nodes), > - dict((inst.name, inst) for inst in instances)) > + dict((inst.name, inst) for inst in instances), > + locks) > > > def _LoadKnownGroups(): > @@ -751,7 +772,7 @@ def _GroupWatcher(opts): > > _CheckMaster(client) > > - (nodes, instances) = _GetGroupData(query_client, group_uuid) > + (nodes, instances, locks) = _GetGroupData(query_client, group_uuid) > > # Update per-group instance status file > _UpdateInstanceStatus(inst_status_path, instances.values()) > @@ -760,7 +781,7 @@ def _GroupWatcher(opts): > pathutils.WATCHER_GROUP_INSTANCE_STATUS_FILE, > known_groups) > > - started = _CheckInstances(client, notepad, instances) > + started = _CheckInstances(client, notepad, instances, locks) > _CheckDisks(client, notepad, nodes, instances, started) > _VerifyDisks(client, group_uuid, nodes, instances) > except Exception, err: > -- > 1.9.0.279.gdc9e3eb > Thanks, Michele -- Google Germany GmbH Dienerstr. 12 80331 München Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg Geschäftsführer: Graham Law, Christine Elizabeth Flores
