On Fri, 2014-12-26 at 14:45 +0800, Li Bin wrote:
> On 2014/7/8 4:05, Peter Zijlstra wrote:
> > On Mon, Jul 07, 2014 at 09:55:43AM -0400, Sasha Levin wrote:
> >> I've also had this one, which looks similar:
> >>
> >> [10375.005884] BUG: spinlock recursion on CPU#0, modprobe/10965
> >> [10375.006573]  lock: 0xffff8803a0fd7740, .magic: dead4ead, .owner: 
> >> modprobe/10965, .owner_cpu: 15
> >> [10375.007412] CPU: 0 PID: 10965 Comm: modprobe Tainted: G        W      
> >> 3.16.0-rc3-next-20140704-sasha-00023-g26c0906-dirty #765
> > 
> > Something's fucked; so we have:
> > 
> > debug_spin_lock_before()
> >     SPIN_BUG_ON(lock->owner == current, "recursion");
> > 
> 
> Hello,
> Does ACCESS_ONCE() can help this issue? I have no evidence that its lack is
> responsible for the issue, but I think here need it indeed. Is that right?
> 
> SPIN_BUG_ON(ACCESS_ONCE(lock->owner) == current, "recursion");

Hmm I guess on a contended spinlock, there's a chance that lock->owner
can change, if the contended lock is acquired, right between the 'cond'
and spin_debug(), which would explain the bogus ->owner related
messages. Of course the same applies to ->owner_cpu. Your ACCESS_ONCE,
however, doesn't really change anything since we still read ->owner
again in spin_debug; How about something like this (untested)?

diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 0374a59..86c199a 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -75,15 +75,58 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg)
        spin_dump(lock, msg);
 }
 
+static void spin_dump_owner(raw_spinlock_t *lock, struct task_struct *owner, 
+                           int owner_cpu, const char *msg)
+{
+       printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
+              msg, raw_smp_processor_id(),
+              current->comm, task_pid_nr(current));
+       printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
+              ".owner_cpu: %d\n", lock, lock->magic, owner->comm,
+              task_pid_nr(owner), owner_cpu);
+
+       dump_stack();
+}
+
+static void spin_bug_owner_lock(raw_spinlock_t *lock)
+{
+       int owner_cpu;
+       struct task_struct *owner;
+
+       if (!debug_locks_off())
+               return;
+
+       owner = ACCESS_ONCE(lock->owner);
+       owner_cpu = ACCESS_ONCE(lock->owner_cpu);
+       if (owner == current)
+               spin_dump_owner(lock, owner, owner_cpu, "recursion");
+       if (owner_cpu == raw_smp_processor_id())
+               spin_dump_owner(lock, owner, owner_cpu, "cpu recursion");
+}
+
+static void spin_bug_owner_unlock(raw_spinlock_t *lock)
+{
+       int owner_cpu;
+       struct task_struct *owner;
+
+       if (!debug_locks_off())
+               return;
+
+       owner = ACCESS_ONCE(lock->owner);
+       owner_cpu = ACCESS_ONCE(lock->owner_cpu);
+       if (owner != current)
+               spin_dump_owner(lock, owner, owner_cpu, "wrong owner");
+       if (owner_cpu != raw_smp_processor_id())
+               spin_dump_owner(lock, owner, owner_cpu, "wrong CPU");
+}
+
 #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
 
 static inline void
 debug_spin_lock_before(raw_spinlock_t *lock)
 {
        SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-       SPIN_BUG_ON(lock->owner == current, lock, "recursion");
-       SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
-                                                       lock, "cpu recursion");
+       spin_bug_owner_lock(lock);
 }
 
 static inline void debug_spin_lock_after(raw_spinlock_t *lock)
@@ -96,9 +139,8 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
 {
        SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
        SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
-       SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
-       SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
-                                                       lock, "wrong CPU");
+       spin_bug_owner_unlock(lock);
+
        lock->owner = SPINLOCK_OWNER_INIT;
        lock->owner_cpu = -1;
 }


Thanks,
Davidlohr

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to