This fixes bug #1114 in bugzilla, which is a deadlock between ipoib_stop and mcast_join_task.

ipoib_stop is called with rtnl_lock, and flushes ipoib_workqueue.
the flush operation might wait for mcast_join_task to finish, which
in turn might wait for rtnl_lock.

Signed-off-by: Yossi Etigin <[EMAIL PROTECTED]>

--

Index: b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c    2008-08-04 
18:09:33.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c    2008-08-04 
18:39:08.000000000 +0300
@@ -504,6 +504,7 @@
        struct ipoib_dev_priv *priv =
                container_of(work, struct ipoib_dev_priv, mcast_join_task.work);
        struct net_device *dev = priv->dev;
+       int ret;

        if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
                return;
@@ -577,9 +578,16 @@
        priv->mcast_mtu = 
IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));

        if (!ipoib_cm_admin_enabled(dev)) {
-               rtnl_lock();
-               dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
-               rtnl_unlock();
+               /* Avoid deadlock with ipoib_stop */
+               while (!(ret = rtnl_trylock()) &&
+                      test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+                       yield();
+
+               if (ret) {
+                       dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
+                       rtnl_unlock();
+               } else
+                       ipoib_dbg_mcast(priv, "ignoring mtu setup because device is 
down\n");
        }

        ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");

--
--Yossi

_______________________________________________
ewg mailing list
ewg@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg

Reply via email to