On Sun, May 31, 2009 at 02:34:56PM +0300, Or Gerlitz wrote: > .... Looking on the code, I'm not sure why the non sucess/flush path > of ipoib_cm_handle_tx_wc() must access the neighbour while > ipoib_ib_handle_tx_wc can get a way with only a warning print... > do we agree that accessing the neigbour from the cm tx completion > flow is buggy?
But the fundamental problem is that the ipoib_neigh structures aren't read and written using consitent locking (particularly in ipoib_neigh_cleanup()). What about introducing a set of locks for no other purpose than protecting ipoib_neigh structures? Something like this (which is only intended to convey the idea - it's obviously incomplete): NOT-Signed-off-by: Arthur Kepner <[email protected]> --- diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 753a983..cc20074 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -403,6 +403,30 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu) return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE; } +struct ipoib_neigh_lock +{ + spinlock_t sl; +}__attribute__((__aligned__(SMP_CACHE_BYTES))); + +#define IPOIB_LOCK_SHIFT 6 +#define IPOIB_LOCK_SIZE (1 << IPOIB_LOCK_SHIFT) +#define IPOIB_LOCK_MASK (IPOIB_LOCK_SIZE -1) + +static struct ipoib_neigh_lock +ipoib_neigh_locks[IPOIB_LOCK_SIZE] __cacheline_aligned; + +static inline void lock_ipoib_neigh(unsigned int hval) +{ + spin_lock(&ipoib_neigh_locks[hval & IPOIB_LOCK_MASK].sl); +} + +static inline void unlock_ipoib_neigh(unsigned int hval) +{ + spin_unlock(&ipoib_neigh_locks[hval & IPOIB_LOCK_MASK].sl); +} + +unsigned int ipoib_neigh_hval(struct neighbour *n); + /* * We stash a pointer to our private neighbour information after our * hardware address in neigh->ha. The ALIGN() expression here makes diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ab2c192..685b664 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -45,6 +45,7 @@ #include <linux/ip.h> #include <linux/in.h> +#include <linux/jhash.h> #include <net/dst.h> @@ -844,6 +845,9 @@ static void ipoib_neigh_cleanup(struct neighbour *n) struct ipoib_dev_priv *priv = netdev_priv(n->dev); unsigned long flags; struct ipoib_ah *ah = NULL; + unsigned int hval = ipoib_neigh_hval(n); + + lock_ipoib_neigh(hval); neigh = *to_ipoib_neigh(n); if (neigh) @@ -864,6 +868,8 @@ static void ipoib_neigh_cleanup(struct neighbour *n) spin_unlock_irqrestore(&priv->lock, flags); + unlock_ipoib_neigh(hval); + if (ah) ipoib_put_ah(ah); } @@ -899,6 +905,13 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) kfree(neigh); } +static unsigned int ipoib_neigh_rnd; + +unsigned int ipoib_neigh_hval(struct neighbour *n) +{ + return jhash(n->primary_key, n->tbl->key_len, ipoib_neigh_rnd); +} + static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) { parms->neigh_cleanup = ipoib_neigh_cleanup; @@ -1408,6 +1421,8 @@ static int __init ipoib_init_module(void) ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif + get_random_bytes(&ipoib_neigh_rnd, sizeof(ipoib_neigh_rnd)); + /* * When copying small received packets, we only copy from the * linear data part of the SKB, so we rely on this condition. _______________________________________________ general mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
