Re: [openib-general] [PATCH]deadlock problem in ipoib

2005-02-24 Thread Shirley Ma

The reason to add this piece was for
the next __path_free() patch. 
Caller can't hold priv spin_lock to
call __path_free() since it's has ipoib_put_ah() inside. I moved the spin_lock
inside of __path_free(), and changed __path_free() to path_free().  And
also the original code does't hold priv spin_lock calling __path_free()
when bringing the interface down.

Thanks
Shirley Ma
IBM Linux Technology Center
15300 SW Koll Parkway
Beaverton, OR 97006-6063
Phone(Fax): (503) 578-7638___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] [PATCH]deadlock problem in ipoib

2005-02-24 Thread Roland Dreier
Thanks, applied except for the chunk below, since I didn't see any
reason to reorder things like this.  It doesn't move the freeing out
of a locked region or fix anything as far as I can tell.

 >  @@ -220,18 +220,23 @@ static void __path_free(struct net_devic
 >  struct ipoib_dev_priv *priv = netdev_priv(dev);
 >  struct ipoib_neigh *neigh, *tn;
 >  struct sk_buff *skb;
 >  +   LIST_HEAD(ah_list);
 >  +   struct ipoib_ah *ah, *tah;
 >   
 >  while ((skb = __skb_dequeue(&path->queue)))
 >  dev_kfree_skb_irq(skb);
 >   
 >  list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
 >  if (neigh->ah)
 >  -   ipoib_put_ah(neigh->ah);
 >  +   list_add_tail(&neigh->ah->list, &ah_list);
 >  *to_ipoib_neigh(neigh->neighbour) = NULL;
 >  neigh->neighbour->ops->destructor = NULL;
 >  kfree(neigh);
 >  }
 >   
 >  +   list_for_each_entry_safe(ah, tah, &ah_list, list)
 >  +   ipoib_put_ah(ah);
 >  +
 >  if (path->ah)
 >  ipoib_put_ah(path->ah);
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


Re: [openib-general] [PATCH]deadlock problem in ipoib

2005-02-24 Thread Shirley Ma

Here is the patch against the most recent
bit. Please review it. 

Signed-off-by: Shirley Ma <[EMAIL PROTECTED]> 


diff -urpN infiniband/ulp/ipoib/ipoib_main.c
infiniband-spinlock/ulp/ipoib/ipoib_main.c
--- infiniband/ulp/ipoib/ipoib_main.c        2005-02-24
18:06:15.0 +
+++ infiniband-spinlock/ulp/ipoib/ipoib_main.c        2005-02-24
18:12:13.0 +
@@ -220,18 +220,23 @@ static void __path_free(struct net_devic
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ipoib_neigh *neigh, *tn;
         struct sk_buff *skb;
+        LIST_HEAD(ah_list);
+        struct ipoib_ah *ah, *tah;
 
         while ((skb = __skb_dequeue(&path->queue)))
                 dev_kfree_skb_irq(skb);
 
         list_for_each_entry_safe(neigh, tn,
&path->neigh_list, list) {
                 if
(neigh->ah)
-                
       ipoib_put_ah(neigh->ah);
+                
       list_add_tail(&neigh->ah->list,
&ah_list);
                 *to_ipoib_neigh(neigh->neighbour)
= NULL;
                 neigh->neighbour->ops->destructor
= NULL;
                 kfree(neigh);
         }
 
+        list_for_each_entry_safe(ah, tah,
&ah_list, list)
+                ipoib_put_ah(ah);
+
         if (path->ah)
                 ipoib_put_ah(path->ah);
 
@@ -660,6 +665,7 @@ static void ipoib_neigh_destructor(struc
         struct ipoib_neigh *neigh = *to_ipoib_neigh(n);
         struct
ipoib_dev_priv *priv = netdev_priv(n->dev);
         unsigned long flags;
+        struct ipoib_ah *ah = NULL;
 
         ipoib_dbg(priv,
                  
"neigh_destructor for %06x " IPOIB_GID_FMT "\n",
@@ -670,13 +676,16 @@ static void ipoib_neigh_destructor(struc
 
         if (neigh) {
                 if
(neigh->ah)
-                
       ipoib_put_ah(neigh->ah);
+                
       ah = neigh->ah;
                 list_del(&neigh->list);
                 *to_ipoib_neigh(n)
= NULL;
                 kfree(neigh);
         }
 
         spin_unlock_irqrestore(&priv->lock,
flags);
+        
+        if (ah)
+                ipoib_put_ah(ah);
 }
 
 static int ipoib_neigh_setup(struct neighbour *neigh)
diff -urpN infiniband/ulp/ipoib/ipoib_multicast.c infiniband-spinlock/ulp/ipoib/ipoib_multicast.c
--- infiniband/ulp/ipoib/ipoib_multicast.c        2005-02-24
18:06:15.0 +
+++ infiniband-spinlock/ulp/ipoib/ipoib_multicast.c      
 2005-02-24 18:12:13.0 +
@@ -93,6 +93,8 @@ static void ipoib_mcast_free(struct ipoi
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ipoib_neigh *neigh, *tmp;
         unsigned long flags;
+        LIST_HEAD(ah_list);
+        struct ipoib_ah *ah, *tah;
 
         ipoib_dbg_mcast(netdev_priv(dev),
                  
      "deleting multicast group " IPOIB_GID_FMT
"\n",
@@ -101,7 +103,8 @@ static void ipoib_mcast_free(struct ipoi
         spin_lock_irqsave(&priv->lock,
flags);
 
         list_for_each_entry_safe(neigh, tmp,
&mcast->neigh_list, list) {
-                ipoib_put_ah(neigh->ah);
+                if
(neigh->ah)
+                
       list_add_tail(&neigh->ah->list,
&ah_list);
                 *to_ipoib_neigh(neigh->neighbour)
= NULL;
                 neigh->neighbour->ops->destructor
= NULL;
                 kfree(neigh);
@@ -109,6 +112,9 @@ static void ipoib_mcast_free(struct ipoi
 
         spin_unlock_irqrestore(&priv->lock,
flags);
 
+        list_for_each_entry_safe(ah, tah,
&ah_list, list)
+                ipoib_put_ah(ah);
+
         if (mcast->ah)
                 ipoib_put_ah(mcast->ah);
 



Thanks
Shirley Ma
IBM Linux Technology Center
15300 SW Koll Parkway
Beaverton, OR 97006-6063
Phone(Fax): (503) 578-7638



infiniband-spinlock.patch
Description: Binary data
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] [PATCH]deadlock problem in ipoib

2005-02-17 Thread Shirley Ma

Sorry, made a mistake in the previous
patch. Here is the right one.


diff -urN infiniband/ulp/ipoib/ipoib_main.c
infiniband-lock/ulp/ipoib/ipoib_main.c
--- infiniband/ulp/ipoib/ipoib_main.c        2005-02-17
17:24:56.0 +
+++ infiniband-lock/ulp/ipoib/ipoib_main.c        2005-02-18
00:20:10.0 +
@@ -220,18 +220,23 @@
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ipoib_neigh *neigh, *tn;
         struct sk_buff *skb;
+        LIST_HEAD(ah_list);
+        struct ipoib_ah *ah, *tah;
 
         while ((skb = __skb_dequeue(&path->queue)))
                 dev_kfree_skb_irq(skb);
 
         list_for_each_entry_safe(neigh, tn,
&path->neigh_list, list) {
                 if
(neigh->ah)
-                
       ipoib_put_ah(neigh->ah);
+                
       list_add_tail(&neigh->ah->list,
&ah_list);
                 *to_ipoib_neigh(neigh->neighbour)
= NULL;
                 neigh->neighbour->ops->destructor
= NULL;
                 kfree(neigh);
         }
 
+        list_for_each_entry_safe(ah, tah,
&ah_list, list)
+                ipoib_put_ah(ah);
+
         if (path->ah)
                 ipoib_put_ah(path->ah);
 
@@ -660,6 +665,7 @@
         struct ipoib_neigh *neigh = *to_ipoib_neigh(n);
         struct ipoib_dev_priv *priv = netdev_priv(n->dev);
         unsigned long flags;
+        struct
ipoib_ah *ah = NULL;
 
         ipoib_dbg(priv,
                  
"neigh_destructor for %06x " IPOIB_GID_FMT "\n",
@@ -670,13 +676,16 @@
 
         if (neigh) {
                 if
(neigh->ah)
-                
       ipoib_put_ah(neigh->ah);
+                
       ah = neigh->ah;
                 list_del(&neigh->list);
                 *to_ipoib_neigh(n)
= NULL;
                 kfree(neigh);
         }
 
         spin_unlock_irqrestore(&priv->lock,
flags);
+        
+        if (ah)
+                ipoib_put_ah(ah);
 }
 
 static int ipoib_neigh_setup(struct neighbour *neigh)
diff -urN infiniband/ulp/ipoib/ipoib_multicast.c infiniband-lock/ulp/ipoib/ipoib_multicast.c
--- infiniband/ulp/ipoib/ipoib_multicast.c        2005-02-17
17:24:56.0 +
+++ infiniband-lock/ulp/ipoib/ipoib_multicast.c        2005-02-18
00:20:34.0 +
@@ -93,6 +93,8 @@
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ipoib_neigh *neigh, *tmp;
         unsigned long flags;
+        LIST_HEAD(ah_list);
+        struct ipoib_ah *ah, *tah;
 
         ipoib_dbg_mcast(netdev_priv(dev),
                  
      "deleting multicast group " IPOIB_GID_FMT
"\n",
@@ -101,7 +103,8 @@
         spin_lock_irqsave(&priv->lock,
flags);
 
         list_for_each_entry_safe(neigh, tmp,
&mcast->neigh_list, list) {
-        
       ipoib_put_ah(neigh->ah);
+                if
(neigh->ah)
+                
       list_add_tail(&neigh->ah->list,
&ah_list);
                 *to_ipoib_neigh(neigh->neighbour)
= NULL;
                 neigh->neighbour->ops->destructor
= NULL;
                 kfree(neigh);
@@ -109,6 +112,9 @@
 
         spin_unlock_irqrestore(&priv->lock,
flags);
 
+        list_for_each_entry_safe(ah, tah,
&ah_list, list)
+                ipoib_put_ah(ah);
+
         if (mcast->ah)
                 ipoib_put_ah(mcast->ah);
 





Thanks
Shirley Ma
IBM Linux Technology Center
15300 SW Koll Parkway
Beaverton, OR 97006-6063
Phone(Fax): (503) 578-7638



infiniband-lock.patch
Description: Binary data
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] [PATCH]deadlock problem in ipoib

2005-02-17 Thread Shirley Ma

Hi, Roland,

I've created a new patch. Please review
this one.


diff -urN infiniband/ulp/ipoib/ipoib_main.c
infiniband-lock/ulp/ipoib/ipoib_main.c
--- infiniband/ulp/ipoib/ipoib_main.c        2005-02-17
17:24:56.0 +
+++ infiniband-lock/ulp/ipoib/ipoib_main.c        2005-02-17
17:46:43.0 +
@@ -220,18 +220,23 @@
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ipoib_neigh *neigh, *tn;
         struct sk_buff *skb;
+        LIST_HEAD(ah_list);
+        struct ipoib_ah *ah, *tah;
 
         while ((skb = __skb_dequeue(&path->queue)))
                 dev_kfree_skb_irq(skb);
 
         list_for_each_entry_safe(neigh, tn,
&path->neigh_list, list) {
                 if
(neigh->ah)
-                
       ipoib_put_ah(neigh->ah);
+                
       list_add_tail(&ah_list, &neigh->ah->list);
       
                 *to_ipoib_neigh(neigh->neighbour)
= NULL;
                 neigh->neighbour->ops->destructor
= NULL;
                 kfree(neigh);
         }
 
+        list_for_each_entry_safe(ah, tah,
&ah_list, list)
+                ipoib_put_ah(ah);
+
         if (path->ah)
                 ipoib_put_ah(path->ah);
 
@@ -660,6 +665,7 @@
         struct ipoib_neigh *neigh = *to_ipoib_neigh(n);
         struct ipoib_dev_priv *priv = netdev_priv(n->dev);
         unsigned long flags;
+        struct
ipoib_ah *ah = NULL;
 
         ipoib_dbg(priv,
                  
"neigh_destructor for %06x " IPOIB_GID_FMT "\n",
@@ -670,13 +676,16 @@
 
         if (neigh) {
                 if
(neigh->ah)
-                
       ipoib_put_ah(neigh->ah);
+                
       ah = neigh->ah;
                 list_del(&neigh->list);
                 *to_ipoib_neigh(n)
= NULL;
                 kfree(neigh);
         }
 
         spin_unlock_irqrestore(&priv->lock,
flags);
+        
+        if (ah)
+                ipoib_put_ah(ah);
 }
 
 static int ipoib_neigh_setup(struct neighbour *neigh)
diff -urN infiniband/ulp/ipoib/ipoib_multicast.c infiniband-lock/ulp/ipoib/ipoib_multicast.c
--- infiniband/ulp/ipoib/ipoib_multicast.c        2005-02-17
17:24:56.0 +
+++ infiniband-lock/ulp/ipoib/ipoib_multicast.c        2005-02-17
17:37:03.0 +
@@ -93,6 +93,8 @@
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ipoib_neigh *neigh, *tmp;
         unsigned long flags;
+        LIST_HEAD(ah_list);
+        struct ipoib_ah *ah, *tah;
 
         ipoib_dbg_mcast(netdev_priv(dev),
                  
      "deleting multicast group " IPOIB_GID_FMT
"\n",
@@ -101,7 +103,8 @@
         spin_lock_irqsave(&priv->lock,
flags);
 
         list_for_each_entry_safe(neigh, tmp,
&mcast->neigh_list, list) {
-        
       ipoib_put_ah(neigh->ah);
+                if
(neigh->ah)
+                
       list_add_tail(&ah_list, &neigh->ah->list);
                 *to_ipoib_neigh(neigh->neighbour)
= NULL;
                 neigh->neighbour->ops->destructor
= NULL;
                 kfree(neigh);
@@ -109,6 +112,9 @@
 
         spin_unlock_irqrestore(&priv->lock,
flags);
 
+        list_for_each_entry_safe(ah, tah,
&ah_list, list)
+                ipoib_put_ah(ah);
+
         if (mcast->ah)
                 ipoib_put_ah(mcast->ah);

 

Thanks
Shirley Ma
IBM Linux Technology Center
15300 SW Koll Parkway
Beaverton, OR 97006-6063
Phone(Fax): (503) 578-7638



infiniband-lock.patch
Description: Binary data
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] [PATCH]deadlock problem in ipoib

2005-02-15 Thread Shirley Ma

>I think the proper fix for those places calling
ipoib_put_ah() inside
a lock is to move the ipoib_ah structs to a temporary list and then
put them outside of the lock.

I will create a new patch.

Thanks
Shirley Ma
IBM Linux Technology Center
15300 SW Koll Parkway
Beaverton, OR 97006-6063
Phone(Fax): (503) 578-7638

___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [openib-general] [PATCH]deadlock problem in ipoib

2005-02-14 Thread Roland Dreier
Shirley> This patch has fixed a deadlock problem: the caller calls
Shirley> ipoib_put_ah() while holding priv->lock. (In
Shirley> ipoib_free_ah() the same lock is reacquired.) This also
Shirley> fixes the ipoib_flush_paths() calls __patch_free()
Shirley> without holding priv->lock.

You definitely found some bugs but I think the real fix is to make
sure that ipoib_put_ah() is called -without- any locks held.  That's
because for future devices, ib_destroy_ah() may actually sleep
(although it won't for mthca).

The __path_free in unicast_arp_send() is actually safe to call inside
of the lock because we know that we just created the path struct, and
therefore it won't have an attached ah, so ipoib_put_ah() can never be
called.  (Actually it looks like there's a bug in path_rec_create()
also -- it doesn't set path->ah to NULL)

I think the proper fix for those places calling ipoib_put_ah() inside
a lock is to move the ipoib_ah structs to a temporary list and then
put them outside of the lock.

Thanks,
  Roland
___
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general