On Wed, Jul 08, 2015 at 05:28:44PM +0200, 'Klaus Aehlig' via ganeti-devel wrote:
Our tests running via RunWithLocks strictly depend on no
watcher jobs interfering. Therefore they pause the watcher;
unfortunately, there still is a race: the watcher only checks
the pause status upon its invocation, but submits jobs later
in its run time. Therefore not only pause it (doesn't hurt),
but also add a filter to reject all its jobs, and then wait
for all running jobs to terminate.
Signed-off-by: Klaus Aehlig <[email protected]>
---
qa/qa_job_utils.py | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/qa/qa_job_utils.py b/qa/qa_job_utils.py
index 23ff206..b0edd6f 100644
--- a/qa/qa_job_utils.py
+++ b/qa/qa_job_utils.py
@@ -365,7 +365,15 @@ def RunWithLocks(fn, locks, timeout, block, *args,
**kwargs):
"acquired in the course of a QA test.")
# The watcher may interfere by issuing its own jobs - therefore pause it
+ # also reject all its jobs and wait for any running jobs to finish.
AssertCommand(["gnt-cluster", "watcher", "pause", "12h"])
+ filter_uuid = stdout_of([
+ "gnt-filter", "add",
+ '--predicates=[["reason", ["=", "source", "gnt:watcher"]]]',
+ "--action=REJECT"
+ ])
+ while stdout_of(["gnt-job", "list", "--no-header", "--running"]) != "":
+ time.sleep(1)
# Find out the lock names prior to starting the delay function
lock_name_map = _FindLockNames(locks)
@@ -416,6 +424,7 @@ def RunWithLocks(fn, locks, timeout, block, *args,
**kwargs):
pass
# Revive the watcher
+ AssertCommand(["gnt-filter", "delete", filter_uuid])
AssertCommand(["gnt-cluster", "watcher", "continue"])
--
2.4.3.573.g4eafbef
LGTM, thanks