On Sun, May 31, 2009 at 02:34:56PM +0300, Or Gerlitz wrote:
> .... Looking on the code, I'm not sure why the non sucess/flush path 
> of ipoib_cm_handle_tx_wc() must access the neighbour while 
> ipoib_ib_handle_tx_wc can get a way with only a warning print... 
> do we agree that accessing the neigbour from the cm tx completion 
> flow is buggy?

But the fundamental problem is that the ipoib_neigh structures 
aren't read and written using consitent locking (particularly 
in ipoib_neigh_cleanup()). 

What about introducing a set of locks for no other purpose 
than protecting ipoib_neigh structures? Something like this
(which is only intended to convey the idea - it's obviously 
incomplete):


NOT-Signed-off-by: Arthur Kepner <[email protected]>

--- 

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h 
b/drivers/infiniband/ulp/ipoib/ipoib.h
index 753a983..cc20074 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -403,6 +403,30 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
        return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
 }
 
+struct ipoib_neigh_lock
+{
+       spinlock_t sl;
+}__attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define IPOIB_LOCK_SHIFT       6
+#define IPOIB_LOCK_SIZE                (1 << IPOIB_LOCK_SHIFT)
+#define IPOIB_LOCK_MASK                (IPOIB_LOCK_SIZE -1)
+
+static struct ipoib_neigh_lock 
+ipoib_neigh_locks[IPOIB_LOCK_SIZE] __cacheline_aligned;
+
+static inline void lock_ipoib_neigh(unsigned int hval)
+{
+       spin_lock(&ipoib_neigh_locks[hval & IPOIB_LOCK_MASK].sl);
+}
+
+static inline void unlock_ipoib_neigh(unsigned int hval)
+{
+       spin_unlock(&ipoib_neigh_locks[hval & IPOIB_LOCK_MASK].sl);
+}
+
+unsigned int ipoib_neigh_hval(struct neighbour *n);
+
 /*
  * We stash a pointer to our private neighbour information after our
  * hardware address in neigh->ha.  The ALIGN() expression here makes
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c 
b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ab2c192..685b664 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -45,6 +45,7 @@
 
 #include <linux/ip.h>
 #include <linux/in.h>
+#include <linux/jhash.h>
 
 #include <net/dst.h>
 
@@ -844,6 +845,9 @@ static void ipoib_neigh_cleanup(struct neighbour *n)
        struct ipoib_dev_priv *priv = netdev_priv(n->dev);
        unsigned long flags;
        struct ipoib_ah *ah = NULL;
+       unsigned int hval = ipoib_neigh_hval(n);
+
+       lock_ipoib_neigh(hval);
 
        neigh = *to_ipoib_neigh(n);
        if (neigh)
@@ -864,6 +868,8 @@ static void ipoib_neigh_cleanup(struct neighbour *n)
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
+       unlock_ipoib_neigh(hval);
+
        if (ah)
                ipoib_put_ah(ah);
 }
@@ -899,6 +905,13 @@ void ipoib_neigh_free(struct net_device *dev, struct 
ipoib_neigh *neigh)
        kfree(neigh);
 }
 
+static unsigned int ipoib_neigh_rnd;
+
+unsigned int ipoib_neigh_hval(struct neighbour *n)
+{
+        return jhash(n->primary_key, n->tbl->key_len, ipoib_neigh_rnd);
+}
+
 static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms 
*parms)
 {
        parms->neigh_cleanup = ipoib_neigh_cleanup;
@@ -1408,6 +1421,8 @@ static int __init ipoib_init_module(void)
        ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
 #endif
 
+       get_random_bytes(&ipoib_neigh_rnd, sizeof(ipoib_neigh_rnd));
+
        /*
         * When copying small received packets, we only copy from the
         * linear data part of the SKB, so we rely on this condition.
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to