Our tests running via RunWithLocks strictly depend on no
watcher jobs interfering. Therefore they pause the watcher;
unfortunately, there still is a race: the watcher only checks
the pause status upon its invocation, but submits jobs later
in its run time. Therefore not only pause it (doesn't hurt),
but also add a filter to reject all its jobs, and then wait
for all running jobs to terminate.

Signed-off-by: Klaus Aehlig <[email protected]>
---
 qa/qa_job_utils.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/qa/qa_job_utils.py b/qa/qa_job_utils.py
index 23ff206..b0edd6f 100644
--- a/qa/qa_job_utils.py
+++ b/qa/qa_job_utils.py
@@ -365,7 +365,15 @@ def RunWithLocks(fn, locks, timeout, block, *args, 
**kwargs):
                          "acquired in the course of a QA test.")
 
   # The watcher may interfere by issuing its own jobs - therefore pause it
+  # also reject all its jobs and wait for any running jobs to finish.
   AssertCommand(["gnt-cluster", "watcher", "pause", "12h"])
+  filter_uuid = stdout_of([
+    "gnt-filter", "add",
+    '--predicates=[["reason", ["=", "source", "gnt:watcher"]]]',
+    "--action=REJECT"
+  ])
+  while stdout_of(["gnt-job", "list", "--no-header", "--running"]) != "":
+    time.sleep(1)
 
   # Find out the lock names prior to starting the delay function
   lock_name_map = _FindLockNames(locks)
@@ -416,6 +424,7 @@ def RunWithLocks(fn, locks, timeout, block, *args, 
**kwargs):
     pass
 
   # Revive the watcher
+  AssertCommand(["gnt-filter", "delete", filter_uuid])
   AssertCommand(["gnt-cluster", "watcher", "continue"])
 
 
-- 
2.4.3.573.g4eafbef

Reply via email to