On Mon, Mar 3, 2014 at 4:52 PM, Jose A. Lopes <[email protected]> wrote: > Interdiff: > > diff --git a/lib/watcher/__init__.py b/lib/watcher/__init__.py > index 5abd282..7131a95 100644 > --- a/lib/watcher/__init__.py > +++ b/lib/watcher/__init__.py > @@ -176,13 +176,7 @@ class Node: > def _CleanupInstance(cl, notepad, inst, locks): > n = notepad.NumberOfCleanupAttempts(inst.name) > > - lock = None > - for (name, lock_status) in locks: > - if name == inst.name: > - lock = lock_status > - break > - > - if lock is not None: > + if inst.name in locks: > logging.info("Not cleaning up instance '%s', instance is locked", > inst.name) > return > @@ -664,9 +658,11 @@ def _GetGroupData(qcl, uuid): > prefix = "instance/" > prefix_len = len(prefix) > > - locks = [(name[prefix_len:], lock) > - for [[_, name], [_, lock]] in locks.data > - if name.startswith(prefix)] > + locked_instances = set() > + > + for [[_, name], [_, lock]] in locks.data: > + if name.startswith(prefix) and lock: > + locked_instances.add(name[prefix_len:]) > > queries = [ > (constants.QR_INSTANCE, > @@ -714,7 +710,7 @@ def _GetGroupData(qcl, uuid): > > return (dict((node.name, node) for node in nodes), > dict((inst.name, inst) for inst in instances), > - locks) > + locked_instances) > > > def _LoadKnownGroups(): > > > On Mar 03 16:40, Michele Tartara wrote: >> On Mon, Mar 3, 2014 at 4:36 PM, Jose A. Lopes <[email protected]> wrote: >> > On Mar 03 16:30, Michele Tartara wrote: >> >> On Mon, Mar 3, 2014 at 4:05 PM, Jose A. Lopes <[email protected]> >> >> wrote: >> >> > This patch changes the watcher to check whether an instance that is >> >> > down is also locked by some LU before attempting to restart the >> >> > instance. Without checking the lock status, the watcher could think >> >> > that an instance that is being failed over is actually down, for >> >> > example. >> >> > >> >> > This problem occurs because there is a significant time window between >> >> > 'xm stop' and 'xm destroy' during which an instance is reported as >> >> > being down, but the watcher should not act during this period. >> >> > >> >> > This fixes issue 734. >> >> > This introduces issue 743. >> >> > >> >> > Unfortunately, this fix introduces a race condition given at the >> >> > moment it is not possible to query the instance status and the lock >> >> > status simultaneously. It won't be possible to fix this race >> >> > condition until after the locks have been migrated completely to >> >> > Haskell and the cluster configuration can be functionally updated in >> >> > Haskell, which will also allow the simultaneous queries. >> >> >> >> How bad is this? Just a theoretical possibility or a race condition >> >> that could actually show up more or less frequently? >> > >> > So the race condition occurs in the following situation: >> > >> > 1. the lock is free >> > 2. the watcher queries the lock and sees it is free >> > 3. some lu starts and acquires the lock >> > 4. the lu stops the instance >> > 5. the watcher queries the instance status and sees it is down >> > 6. the watcher sends an opcode to clean the instance (because the lock >> > is free and the instance is down) >> > >> > This is very unlikely because two queries (lock + status) are much >> > faster than acquiring the lock and shutting down an instance. >> >> Ok, and given that it cannot really be solved until we complete the >> migration, and that you already filed a bug for it, we can deal with >> it for now. >> >> > >> >> > >> >> > Signed-off-by: Jose A. Lopes <[email protected]> >> >> > --- >> >> > lib/watcher/__init__.py | 33 +++++++++++++++++++++++++++------ >> >> > 1 file changed, 27 insertions(+), 6 deletions(-) >> >> > >> >> > diff --git a/lib/watcher/__init__.py b/lib/watcher/__init__.py >> >> > index a56ee4e..5abd282 100644 >> >> > --- a/lib/watcher/__init__.py >> >> > +++ b/lib/watcher/__init__.py >> >> > @@ -173,9 +173,20 @@ class Node: >> >> > self.secondaries = secondaries >> >> > >> >> > >> >> > -def _CleanupInstance(cl, notepad, inst): >> >> > +def _CleanupInstance(cl, notepad, inst, locks): >> >> > n = notepad.NumberOfCleanupAttempts(inst.name) >> >> > >> >> > + lock = None >> >> > + for (name, lock_status) in locks: >> >> > + if name == inst.name: >> >> > + lock = lock_status >> >> > + break >> >> >> >> I'm not entirely convinced by this. The number of instances can be >> >> big, and each of them might have a few locks. Therefore, the list of >> >> tuples (name, lock_status) can be quite long, and iterating over it >> >> seems to be not really optimal. >> >> >> >> Wouldn't it be better to have the locks structure as a dictionary >> >> where the key is the name of the instance? >> >> >> >> Also, if I'm not mistaken we don't really care about which locks are >> >> taken, but only whether the instance is locked, so this would make the >> >> required data structure even easier and smaller: {name: >> >> bool_is_locked, ...} >> >> >> >> What do you think? >> > >> > Sure! We can use a set instead. >> > >> > Thanks, >> > Jose >> > >> >> > + >> >> > + if lock is not None: >> >> > + logging.info("Not cleaning up instance '%s', instance is locked", >> >> > + inst.name) >> >> > + return >> >> > + >> >> > if n > MAXTRIES: >> >> > logging.warning("Not cleaning up instance '%s', retries exhausted", >> >> > inst.name) >> >> > @@ -194,7 +205,7 @@ def _CleanupInstance(cl, notepad, inst): >> >> > notepad.RecordCleanupAttempt(inst.name) >> >> > >> >> > >> >> > -def _CheckInstances(cl, notepad, instances): >> >> > +def _CheckInstances(cl, notepad, instances, locks): >> >> > """Make a pass over the list of instances, restarting downed ones. >> >> > >> >> > """ >> >> > @@ -204,7 +215,7 @@ def _CheckInstances(cl, notepad, instances): >> >> > >> >> > for inst in instances.values(): >> >> > if inst.status == constants.INSTST_USERDOWN: >> >> > - _CleanupInstance(cl, notepad, inst) >> >> > + _CleanupInstance(cl, notepad, inst, locks) >> >> > elif inst.status in BAD_STATES: >> >> > n = notepad.NumberOfRestartAttempts(inst.name) >> >> > >> >> > @@ -648,6 +659,15 @@ def _GetGroupData(qcl, uuid): >> >> > """Retrieves instances and nodes per node group. >> >> > >> >> > """ >> >> > + locks = qcl.Query(constants.QR_LOCK, ["name", "mode"], None) >> >> > + >> >> > + prefix = "instance/" >> >> > + prefix_len = len(prefix) >> >> > + >> >> > + locks = [(name[prefix_len:], lock) >> >> > + for [[_, name], [_, lock]] in locks.data >> >> > + if name.startswith(prefix)] >> >> > + >> >> > queries = [ >> >> > (constants.QR_INSTANCE, >> >> > ["name", "status", "disks_active", "snodes", >> >> > @@ -693,7 +713,8 @@ def _GetGroupData(qcl, uuid): >> >> > for (name, bootid, offline) in raw_nodes] >> >> > >> >> > return (dict((node.name, node) for node in nodes), >> >> > - dict((inst.name, inst) for inst in instances)) >> >> > + dict((inst.name, inst) for inst in instances), >> >> > + locks) >> >> > >> >> > >> >> > def _LoadKnownGroups(): >> >> > @@ -751,7 +772,7 @@ def _GroupWatcher(opts): >> >> > >> >> > _CheckMaster(client) >> >> > >> >> > - (nodes, instances) = _GetGroupData(query_client, group_uuid) >> >> > + (nodes, instances, locks) = _GetGroupData(query_client, group_uuid) >> >> > >> >> > # Update per-group instance status file >> >> > _UpdateInstanceStatus(inst_status_path, instances.values()) >> >> > @@ -760,7 +781,7 @@ def _GroupWatcher(opts): >> >> > pathutils.WATCHER_GROUP_INSTANCE_STATUS_FILE, >> >> > known_groups) >> >> > >> >> > - started = _CheckInstances(client, notepad, instances) >> >> > + started = _CheckInstances(client, notepad, instances, locks) >> >> > _CheckDisks(client, notepad, nodes, instances, started) >> >> > _VerifyDisks(client, group_uuid, nodes, instances) >> >> > except Exception, err: >> >> > -- >> >> > 1.9.0.279.gdc9e3eb >> >> > >> >> >> >> Thanks, >> >> Michele >> >> >> >> -- >> >> Google Germany GmbH >> >> Dienerstr. 12 >> >> 80331 München >> >> >> >> Registergericht und -nummer: Hamburg, HRB 86891 >> >> Sitz der Gesellschaft: Hamburg >> >> Geschäftsführer: Graham Law, Christine Elizabeth Flores >> > >> > -- >> > Jose Antonio Lopes >> > Ganeti Engineering >> > Google Germany GmbH >> > Dienerstr. 12, 80331, München >> > >> > Registergericht und -nummer: Hamburg, HRB 86891 >> > Sitz der Gesellschaft: Hamburg >> > Geschäftsführer: Graham Law, Christine Elizabeth Flores >> > Steuernummer: 48/725/00206 >> > Umsatzsteueridentifikationsnummer: DE813741370 >> >> >> Thanks, >> Michele
Even better this way! LGTM, thanks. Michele -- Google Germany GmbH Dienerstr. 12 80331 München Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg Geschäftsführer: Graham Law, Christine Elizabeth Flores
