On Wed, Sep 16, 2020 at 04:10:46PM +0800, Boqun Feng wrote:
> On Tue, Sep 15, 2020 at 02:32:51PM -0400, Qian Cai wrote:
> > On Fri, 2020-08-07 at 15:42 +0800, Boqun Feng wrote:
> > > Currently, in safe->unsafe detection, lockdep misses the fact that a
> > > LOCK_ENABLED_IRQ_*_READ usage and a LOCK_USED_IN_IRQ_*_READ usage may
> > > cause deadlock too, for example:
> > > 
> > >   P1                          P2
> > >   <irq disabled>
> > >   write_lock(l1);             <irq enabled>
> > >                               read_lock(l2);
> > >   write_lock(l2);
> > >                               <in irq>
> > >                               read_lock(l1);
> > > 
> > > Actually, all of the following cases may cause deadlocks:
> > > 
> > >   LOCK_USED_IN_IRQ_* -> LOCK_ENABLED_IRQ_*
> > >   LOCK_USED_IN_IRQ_*_READ -> LOCK_ENABLED_IRQ_*
> > >   LOCK_USED_IN_IRQ_* -> LOCK_ENABLED_IRQ_*_READ
> > >   LOCK_USED_IN_IRQ_*_READ -> LOCK_ENABLED_IRQ_*_READ
> > > 
> > > To fix this, we need to 1) change the calculation of exclusive_mask() so
> > > that READ bits are not dropped and 2) always call usage() in
> > > mark_lock_irq() to check usage deadlocks, even when the new usage of the
> > > lock is READ.
> > > 
> > > Besides, adjust usage_match() and usage_acculumate() to recursive read
> > > lock changes.
> > > 
> > > Signed-off-by: Boqun Feng <boqun.f...@gmail.com>
> > 
> > So our daily CI starts to trigger a warning (graph corruption?) below. From 
> > the
> > call traces, this recent patchset changed a few related things here and 
> > there.
> > Does it ring any bells?
> > 
> > [14828.805563][T145183] lockdep bfs error:-1
> 
> -1 is BFS_EQUEUEFULL, that means we hit the size limitation in lockdep
> searching, which is possible since recursive read deadlock detection
> tries to make the all edges (dependencies) searched. So maybe we should
> switch to DFS instead of BFS, I will look into this, in the meanwhile,
> could you try the following to see if it can help on the warnings you
> got?
> 

Found a way to resolve this while still keeping the BFS. Every time when
we want to enqueue a lock_list, we basically enqueue a whole dep list of
entries from the previous lock_list, so we can use a trick here: instead
enqueue all the entries, we only enqueue the first entry and we can
fetch other silbing entries with list_next_or_null_rcu(). Patch as
below, I also took the chance to clear the code up and add more
comments. I could see this number (in /proc/lockdep_stats):

        max bfs queue depth:                   201

down to (after apply this patch)

        max bfs queue depth:                   61

with x86_64_defconfig along with lockdep and selftest configs.

Qian, could you give it a try?

Regards,
Boqun

---------------------->8
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 454355c033d2..1cc1302bf319 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1640,35 +1640,22 @@ static enum bfs_result __bfs(struct lock_list 
*source_entry,
                             int offset)
 {
        struct lock_list *entry;
-       struct lock_list *lock;
+       struct lock_list *lock = NULL;
        struct list_head *head;
        struct circular_queue *cq = &lock_cq;
-       enum bfs_result ret = BFS_RNOMATCH;
 
        lockdep_assert_locked();
 
-       if (match(source_entry, data)) {
-               *target_entry = source_entry;
-               ret = BFS_RMATCH;
-               goto exit;
-       }
-
-       head = get_dep_list(source_entry, offset);
-       if (list_empty(head))
-               goto exit;
-
        __cq_init(cq);
        __cq_enqueue(cq, source_entry);
 
-       while ((lock = __cq_dequeue(cq))) {
-               bool prev_only_xr;
-
-               if (!lock->class) {
-                       ret = BFS_EINVALIDNODE;
-                       goto exit;
-               }
+       while (lock || (lock = __cq_dequeue(cq))) {
+               if (!lock->class)
+                       return BFS_EINVALIDNODE;
 
                /*
+                * Step 1: check whether we already finish on this one.
+                *
                 * If we have visited all the dependencies from this @lock to
                 * others (iow, if we have visited all lock_list entries in
                 * @lock->class->locks_{after,before}) we skip, otherwise go
@@ -1676,17 +1663,17 @@ static enum bfs_result __bfs(struct lock_list 
*source_entry,
                 * list accessed.
                 */
                if (lock_accessed(lock))
-                       continue;
+                       goto next;
                else
                        mark_lock_accessed(lock);
 
-               head = get_dep_list(lock, offset);
-
-               prev_only_xr = lock->only_xr;
-
-               list_for_each_entry_rcu(entry, head, entry) {
-                       unsigned int cq_depth;
-                       u8 dep = entry->dep;
+               /*
+                * Step 2: check whether prev dependency and this form a strong
+                *         dependency path.
+                */
+               if (lock->parent) { /* Parent exists, check prev dependency */
+                       u8 dep = lock->dep;
+                       bool prev_only_xr = lock->parent->only_xr;
 
                        /*
                         * Mask out all -(S*)-> if we only have *R in previous
@@ -1698,29 +1685,68 @@ static enum bfs_result __bfs(struct lock_list 
*source_entry,
 
                        /* If nothing left, we skip */
                        if (!dep)
-                               continue;
+                               goto next;
 
                        /* If there are only -(*R)-> left, set that for the 
next step */
-                       entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
+                       lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK));
+               }
 
-                       visit_lock_entry(entry, lock);
-                       if (match(entry, data)) {
-                               *target_entry = entry;
-                               ret = BFS_RMATCH;
-                               goto exit;
-                       }
+               /*
+                * Step 3: we haven't visited this and there is a strong
+                *         dependency path to this, so check with @match.
+                */
+               if (match(lock, data)) {
+                       *target_entry = lock;
+                       return BFS_RMATCH;
+               }
+
+               /*
+                * Step 4: if not match, expand the path by adding the
+                *         afterwards or backwards dependencis in the search
+                *
+                * Note we only enqueue the first of the list into the queue,
+                * because we can always find a sibling dependency from one
+                * (see label 'next'), as a result the space of queue is saved.
+                */
+               head = get_dep_list(lock, offset);
+               entry = list_first_or_null_rcu(head, struct lock_list, entry);
+               if (entry) {
+                       unsigned int cq_depth;
+
+                       if (__cq_enqueue(cq, entry))
+                               return BFS_EQUEUEFULL;
 
-                       if (__cq_enqueue(cq, entry)) {
-                               ret = BFS_EQUEUEFULL;
-                               goto exit;
-                       }
                        cq_depth = __cq_get_elem_count(cq);
                        if (max_bfs_queue_depth < cq_depth)
                                max_bfs_queue_depth = cq_depth;
                }
+
+               /*
+                * Update the ->parent, so when @entry is iterated, we know the
+                * previous dependency.
+                */
+               list_for_each_entry_rcu(entry, head, entry)
+                       visit_lock_entry(entry, lock);
+next:
+               /*
+                * Step 5: fetch the next dependency to process.
+                *
+                * If there is a previous dependency, we fetch the sibling
+                * dependency in the dep list of previous dependency.
+                *
+                * Otherwise set @lock to NULL to fetch the next entry from
+                * queue.
+                */
+               if (lock->parent) {
+                       head = get_dep_list(lock->parent, offset);
+                       lock = list_next_or_null_rcu(head, &lock->entry,
+                                                    struct lock_list, entry);
+               } else {
+                       lock = NULL;
+               }
        }
-exit:
-       return ret;
+
+       return BFS_RNOMATCH;
 }
 
 static inline enum bfs_result

Reply via email to