When we destroy a cm_id, we must purge associated events from the
event queue.  If the cm_id is for a listen request, we also purge
corresponding pending connect requests.  This requires destroying
the cm_id's associated with the connect requests by calling
rdma_destroy_id().  rdma_destroy_id() blocks until all outstanding
callbacks have completed.

The issue is that we hold file->mut while purging events from the
event queue.  We also acquire file->mut in our event handler.
Calling rdma_destroy_id() while holding file->mut can lead to a
deadlock, since the event handler callback cannot acquire file->mut,
which prevents rdma_destroy_id() from completing.

Fix this by moving events to purge from the event queue to a
temporary list.  We can then release file->mut and call
rdma_destroy_id() outside of holding any locks.

Bug report by Or Gerlitz <ogerl...@mellanox.com>:

[ INFO: possible circular locking dependency detected ]
3.3.0-rc5-00008-g79f1e43-dirty #34 Tainted: G          I

tgtd/9018 is trying to acquire lock:
 (&id_priv->handler_mutex){+.+.+.}, at: [<ffffffffa0359a41>] 
rdma_destroy_id+0x33/0x1f0 [rdma_cm]

but task is already holding lock:
 (&file->mut){+.+.+.}, at: [<ffffffffa02470fe>] ucma_free_ctx+0xb6/0x196 
[rdma_ucm]

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (&file->mut){+.+.+.}:
       [<ffffffff810682f3>] lock_acquire+0xf0/0x116
       [<ffffffff8135f179>] mutex_lock_nested+0x64/0x2e6
       [<ffffffffa0247636>] ucma_event_handler+0x148/0x1dc [rdma_ucm]
       [<ffffffffa035a79a>] cma_ib_handler+0x1a7/0x1f7 [rdma_cm]
       [<ffffffffa0333e88>] cm_process_work+0x32/0x119 [ib_cm]
       [<ffffffffa03362ab>] cm_work_handler+0xfb8/0xfe5 [ib_cm]
       [<ffffffff810423e2>] process_one_work+0x2bd/0x4a6
       [<ffffffff810429e2>] worker_thread+0x1d6/0x350
       [<ffffffff810462a6>] kthread+0x84/0x8c
       [<ffffffff81369624>] kernel_thread_helper+0x4/0x10

-> #0 (&id_priv->handler_mutex){+.+.+.}:
       [<ffffffff81067b86>] __lock_acquire+0x10d5/0x1752
       [<ffffffff810682f3>] lock_acquire+0xf0/0x116
       [<ffffffff8135f179>] mutex_lock_nested+0x64/0x2e6
       [<ffffffffa0359a41>] rdma_destroy_id+0x33/0x1f0 [rdma_cm]
       [<ffffffffa024715f>] ucma_free_ctx+0x117/0x196 [rdma_ucm]
       [<ffffffffa0247255>] ucma_close+0x77/0xb4 [rdma_ucm]
       [<ffffffff810df6ef>] fput+0x117/0x1cf
       [<ffffffff810dc76e>] filp_close+0x6d/0x78
       [<ffffffff8102b667>] put_files_struct+0xbd/0x17d
       [<ffffffff8102b76d>] exit_files+0x46/0x4e
       [<ffffffff8102d057>] do_exit+0x299/0x75d
       [<ffffffff8102d599>] do_group_exit+0x7e/0xa9
       [<ffffffff8103ae4b>] get_signal_to_deliver+0x536/0x555
       [<ffffffff81001717>] do_signal+0x39/0x634
       [<ffffffff81001d39>] do_notify_resume+0x27/0x69
       [<ffffffff81361c03>] retint_signal+0x46/0x83

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&file->mut);
                               lock(&id_priv->handler_mutex);
                               lock(&file->mut);
  lock(&id_priv->handler_mutex);

 *** DEADLOCK ***

1 lock held by tgtd/9018:
 #0:  (&file->mut){+.+.+.}, at: [<ffffffffa02470fe>] ucma_free_ctx+0xb6/0x196 
[rdma_ucm]

stack backtrace:
Pid: 9018, comm: tgtd Tainted: G          I  3.3.0-rc5-00008-g79f1e43-dirty #34
Call Trace:
 [<ffffffff81029e9c>] ? console_unlock+0x18e/0x207
 [<ffffffff81066433>] print_circular_bug+0x28e/0x29f
 [<ffffffff81067b86>] __lock_acquire+0x10d5/0x1752
 [<ffffffff810682f3>] lock_acquire+0xf0/0x116
 [<ffffffffa0359a41>] ? rdma_destroy_id+0x33/0x1f0 [rdma_cm]
 [<ffffffff8135f179>] mutex_lock_nested+0x64/0x2e6
 [<ffffffffa0359a41>] ? rdma_destroy_id+0x33/0x1f0 [rdma_cm]
 [<ffffffff8106546d>] ? trace_hardirqs_on_caller+0x11e/0x155
 [<ffffffff810654b1>] ? trace_hardirqs_on+0xd/0xf
 [<ffffffffa0359a41>] rdma_destroy_id+0x33/0x1f0 [rdma_cm]
 [<ffffffffa024715f>] ucma_free_ctx+0x117/0x196 [rdma_ucm]
 [<ffffffffa0247255>] ucma_close+0x77/0xb4 [rdma_ucm]
 [<ffffffff810df6ef>] fput+0x117/0x1cf
 [<ffffffff810dc76e>] filp_close+0x6d/0x78
 [<ffffffff8102b667>] put_files_struct+0xbd/0x17d
 [<ffffffff8102b5cc>] ? put_files_struct+0x22/0x17d
 [<ffffffff8102b76d>] exit_files+0x46/0x4e
 [<ffffffff8102d057>] do_exit+0x299/0x75d
 [<ffffffff8102d599>] do_group_exit+0x7e/0xa9
 [<ffffffff8103ae4b>] get_signal_to_deliver+0x536/0x555
 [<ffffffff810654b1>] ? trace_hardirqs_on+0xd/0xf
 [<ffffffff81001717>] do_signal+0x39/0x634
 [<ffffffff8135e037>] ? printk+0x3c/0x45
 [<ffffffff8106546d>] ? trace_hardirqs_on_caller+0x11e/0x155
 [<ffffffff810654b1>] ? trace_hardirqs_on+0xd/0xf
 [<ffffffff81361803>] ? _raw_spin_unlock_irq+0x2b/0x40
 [<ffffffff81039011>] ? set_current_blocked+0x44/0x49
 [<ffffffff81361bce>] ? retint_signal+0x11/0x83
 [<ffffffff81001d39>] do_notify_resume+0x27/0x69
 [<ffffffff8118a1fe>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff81361c03>] retint_signal+0x46/0x83

Signed-off-by: Sean Hefty <sean.he...@intel.com>
---
This bug has been in the code for quite a while, and this is the first time
that it's been detected.  Given the likelihood of hitting it, it's probably
safe to wait for 3.4

 drivers/infiniband/core/ucma.c |   37 ++++++++++++++++++-------------------
 1 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 5034a87..5861cdb 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -449,24 +449,6 @@ static void ucma_cleanup_multicast(struct ucma_context 
*ctx)
        mutex_unlock(&mut);
 }
 
-static void ucma_cleanup_events(struct ucma_context *ctx)
-{
-       struct ucma_event *uevent, *tmp;
-
-       list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
-               if (uevent->ctx != ctx)
-                       continue;
-
-               list_del(&uevent->list);
-
-               /* clear incoming connections. */
-               if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
-                       rdma_destroy_id(uevent->cm_id);
-
-               kfree(uevent);
-       }
-}
-
 static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
 {
        struct ucma_event *uevent, *tmp;
@@ -480,9 +462,16 @@ static void ucma_cleanup_mc_events(struct ucma_multicast 
*mc)
        }
 }
 
+/*
+ * We cannot hold file->mut when calling rdma_destroy_id() or we can
+ * deadlock.  We also acquire file->mut in ucma_event_handler(), and
+ * rdma_destroy_id() will wait until all callbacks have completed.
+ */
 static int ucma_free_ctx(struct ucma_context *ctx)
 {
        int events_reported;
+       struct ucma_event *uevent, *tmp;
+       LIST_HEAD(list);
 
        /* No new events will be generated after destroying the id. */
        rdma_destroy_id(ctx->cm_id);
@@ -491,10 +480,20 @@ static int ucma_free_ctx(struct ucma_context *ctx)
 
        /* Cleanup events not yet reported to the user. */
        mutex_lock(&ctx->file->mut);
-       ucma_cleanup_events(ctx);
+       list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
+               if (uevent->ctx == ctx)
+                       list_move_tail(&uevent->list, &list);
+       }
        list_del(&ctx->list);
        mutex_unlock(&ctx->file->mut);
 
+       list_for_each_entry_safe(uevent, tmp, &list, list) {
+               list_del(&uevent->list);
+               if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+                       rdma_destroy_id(uevent->cm_id);
+               kfree(uevent);
+       }
+
        events_reported = ctx->events_reported;
        kfree(ctx);
        return events_reported;


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to