The new completion/crossrelease annotations interact unfavourable with
the extant flush_work()/flush_workqueue() annotations.

The problem is that when a single work class does:

  wait_for_completion(&C)

and

  complete(&C)

in different executions, we'll build dependencies like:

  lock_map_acquire(W)
  complete_acquire(C)

and

  lock_map_acquire(W)
  complete_release(C)

which results in the dependency chain: W->C->W, which lockdep thinks
spells deadlock, even though there is no deadlock potential since
works are ran concurrently.

One possibility would be to change the work 'lock' to recursive-read,
but that would mean hitting a lockdep limitation on recursive locks.
Also, unconditinoally switching to recursive-read here would fail to
detect the actual deadlock on single-threaded workqueues, which do
have a problem with this.

For now, forcefully disregard these locks for crossrelease.


Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 include/linux/irqflags.h |    4 +--
 include/linux/lockdep.h  |    8 +++---
 kernel/locking/lockdep.c |   60 +++++++++++++++++++++++++++++------------------
 kernel/workqueue.c       |   23 +++++++++++++++++-
 4 files changed, 66 insertions(+), 29 deletions(-)

--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -26,7 +26,7 @@
 # define trace_hardirq_enter()                 \
 do {                                           \
        current->hardirq_context++;             \
-       crossrelease_hist_start(XHLOCK_HARD);   \
+       crossrelease_hist_start(XHLOCK_HARD, 0);\
 } while (0)
 # define trace_hardirq_exit()                  \
 do {                                           \
@@ -36,7 +36,7 @@ do {                                          \
 # define lockdep_softirq_enter()               \
 do {                                           \
        current->softirq_context++;             \
-       crossrelease_hist_start(XHLOCK_SOFT);   \
+       crossrelease_hist_start(XHLOCK_SOFT, 0);\
 } while (0)
 # define lockdep_softirq_exit()                        \
 do {                                           \
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -578,11 +578,11 @@ extern void lock_commit_crosslock(struct
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
        { .name = (_name), .key = (void *)(_key), .cross = 0, }
 
-extern void crossrelease_hist_start(enum xhlock_context_t c);
+extern void crossrelease_hist_start(enum xhlock_context_t c, bool force);
 extern void crossrelease_hist_end(enum xhlock_context_t c);
 extern void lockdep_init_task(struct task_struct *task);
 extern void lockdep_free_task(struct task_struct *task);
-#else
+#else /* !CROSSRELEASE */
 #define lockdep_init_map_crosslock(m, n, k, s) do {} while (0)
 /*
  * To initialize a lockdep_map statically use this macro.
@@ -591,11 +591,11 @@ extern void lockdep_free_task(struct tas
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
        { .name = (_name), .key = (void *)(_key), }
 
-static inline void crossrelease_hist_start(enum xhlock_context_t c) {}
+static inline void crossrelease_hist_start(enum xhlock_context_t c, bool 
force) {}
 static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
 static inline void lockdep_init_task(struct task_struct *task) {}
 static inline void lockdep_free_task(struct task_struct *task) {}
-#endif
+#endif /* CROSSRELEASE */
 
 #ifdef CONFIG_LOCK_STAT
 
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4629,7 +4629,7 @@ asmlinkage __visible void lockdep_sys_ex
         * the index to point to the last entry, which is already invalid.
         */
        crossrelease_hist_end(XHLOCK_PROC);
-       crossrelease_hist_start(XHLOCK_PROC);
+       crossrelease_hist_start(XHLOCK_PROC, false);
 }
 
 void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
@@ -4725,25 +4725,25 @@ static inline void invalidate_xhlock(str
 /*
  * Lock history stacks; we have 3 nested lock history stacks:
  *
- *   Hard IRQ
- *   Soft IRQ
- *   History / Task
- *
- * The thing is that once we complete a (Hard/Soft) IRQ the future task locks
- * should not depend on any of the locks observed while running the IRQ.
- *
- * So what we do is rewind the history buffer and erase all our knowledge of
- * that temporal event.
- */
-
-/*
- * We need this to annotate lock history boundaries. Take for instance
- * workqueues; each work is independent of the last. The completion of a future
- * work does not depend on the completion of a past work (in general).
- * Therefore we must not carry that (lock) dependency across works.
+ *   HARD(IRQ)
+ *   SOFT(IRQ)
+ *   PROC(ess)
+ *
+ * The thing is that once we complete a HARD/SOFT IRQ the future task locks
+ * should not depend on any of the locks observed while running the IRQ.  So
+ * what we do is rewind the history buffer and erase all our knowledge of that
+ * temporal event.
+ *
+ * The PROCess one is special though; it is used to annotate independence
+ * inside a task.
+ *
+ * Take for instance workqueues; each work is independent of the last. The
+ * completion of a future work does not depend on the completion of a past work
+ * (in general). Therefore we must not carry that (lock) dependency across
+ * works.
  *
  * This is true for many things; pretty much all kthreads fall into this
- * pattern, where they have an 'idle' state and future completions do not
+ * pattern, where they have an invariant state and future completions do not
  * depend on past completions. Its just that since they all have the 'same'
  * form -- the kthread does the same over and over -- it doesn't typically
  * matter.
@@ -4751,15 +4751,31 @@ static inline void invalidate_xhlock(str
  * The same is true for system-calls, once a system call is completed (we've
  * returned to userspace) the next system call does not depend on the lock
  * history of the previous system call.
+ *
+ * They key property for independence, this invariant state, is that it must be
+ * a point where we hold no locks and have no history. Because if we were to
+ * hold locks, the restore at _end() would not necessarily recover it's history
+ * entry. Similarly, independence per-definition means it does not depend on
+ * prior state.
  */
-void crossrelease_hist_start(enum xhlock_context_t c)
+void crossrelease_hist_start(enum xhlock_context_t c, bool force)
 {
        struct task_struct *cur = current;
 
-       if (cur->xhlocks) {
-               cur->xhlock_idx_hist[c] = cur->xhlock_idx;
-               cur->hist_id_save[c] = cur->hist_id;
+       if (!cur->xhlocks)
+               return;
+
+       /*
+        * We call this at an invariant point, no current state, no history.
+        */
+       if (c == XHLOCK_PROC) {
+               /* verified the former, ensure the latter */
+               WARN_ON_ONCE(!force && cur->lockdep_depth);
+               invalidate_xhlock(&xhlock(cur->xhlock_idx));
        }
+
+       cur->xhlock_idx_hist[c] = cur->xhlock_idx;
+       cur->hist_id_save[c]    = cur->hist_id;
 }
 
 void crossrelease_hist_end(enum xhlock_context_t c)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2093,7 +2093,28 @@ __acquires(&pool->lock)
 
        lock_map_acquire(&pwq->wq->lockdep_map);
        lock_map_acquire(&lockdep_map);
-       crossrelease_hist_start(XHLOCK_PROC);
+       /*
+        * Strictly speaking we should do start(PROC) without holding any
+        * locks, that is, before these two lock_map_acquire()'s.
+        *
+        * However, that would result in:
+        *
+        *   A(W1)
+        *   WFC(C)
+        *              A(W1)
+        *              C(C)
+        *
+        * Which would create W1->C->W1 dependencies, even though there is no
+        * actual deadlock possible. There are two solutions, using a
+        * read-recursive acquire on the work(queue) 'locks', but this will then
+        * hit the lockdep limitation on recursive locks, or simly discard
+        * these locks.
+        *
+        * AFAICT there is no possible deadlock scenario between the
+        * flush_work() and complete() primitives (except for single-threaded
+        * workqueues), so hiding them isn't a problem.
+        */
+       crossrelease_hist_start(XHLOCK_PROC, true);
        trace_workqueue_execute_start(work);
        worker->current_func(work);
        /*


Reply via email to