On Sun, 2008-02-03 at 09:10 -0800, Shirley Ma wrote:
> Hello Eli,
> 
> Can you send me a combined attachment patch for RC3-2.6.24 kernel so i
> can validate for both mthca and ehca here?
> 
> Thanks
> Shirley 


It is already in the latest ofed build and also in the attached files.


IB/ipoib: Split CQs for IPOIB UD

This comes as a preparation for using unsignalled QP in UD mode. It
uses a dedicated CQ for the UD send. The CQ is not armed and is polled
for completion right after sending a packet.
This patch and the following patches fix bugs 760 and 761.

Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
---

Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-01-31 11:42:31.776503000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-01-31 14:11:01.107304000 +0200
@@ -254,7 +254,7 @@ repost:
 			   "for buf %d\n", wr_id);
 }
 
-static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc, int need_lock)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	unsigned int wr_id = wc->wr_id;
@@ -279,13 +279,17 @@ static void ipoib_ib_handle_tx_wc(struct
 
 	dev_kfree_skb_any(tx_req->skb);
 
-	spin_lock_irqsave(&priv->tx_lock, flags);
+	if (need_lock)
+		spin_lock_irqsave(&priv->tx_lock, flags);
+
 	++priv->tx_tail;
 	if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
 	    netif_queue_stopped(dev) &&
 	    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
 		netif_wake_queue(dev);
-	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+	if (need_lock)
+		spin_unlock_irqrestore(&priv->tx_lock, flags);
 
 	if (wc->status != IB_WC_SUCCESS &&
 	    wc->status != IB_WC_WR_FLUSH_ERR)
@@ -294,6 +298,15 @@ static void ipoib_ib_handle_tx_wc(struct
 			   wc->status, wr_id, wc->vendor_err);
 }
 
+static void poll_tx(struct ipoib_dev_priv *priv, int need_lock)
+{
+	int n, i;
+
+	n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc);
+	for (i = 0; i < n; ++i)
+		ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i, need_lock);
+}
+
 int ipoib_poll(struct napi_struct *napi, int budget)
 {
 	struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
@@ -309,7 +322,7 @@ poll_more:
 		int max = (budget - done);
 
 		t = min(IPOIB_NUM_WC, max);
-		n = ib_poll_cq(priv->cq, t, priv->ibwc);
+		n = ib_poll_cq(priv->rcq, t, priv->ibwc);
 
 		for (i = 0; i < n; i++) {
 			struct ib_wc *wc = priv->ibwc + i;
@@ -320,12 +333,8 @@ poll_more:
 					ipoib_cm_handle_rx_wc(dev, wc);
 				else
 					ipoib_ib_handle_rx_wc(dev, wc);
-			} else {
-				if (wc->wr_id & IPOIB_OP_CM)
-					ipoib_cm_handle_tx_wc(dev, wc);
-				else
-					ipoib_ib_handle_tx_wc(dev, wc);
-			}
+			} else
+                                ipoib_cm_handle_tx_wc(priv->dev, wc);
 		}
 
 		if (n != t)
@@ -334,7 +343,7 @@ poll_more:
 
 	if (done < budget) {
 		netif_rx_complete(dev, napi);
-		if (unlikely(ib_req_notify_cq(priv->cq,
+		if (unlikely(ib_req_notify_cq(priv->rcq,
 					      IB_CQ_NEXT_COMP |
 					      IB_CQ_REPORT_MISSED_EVENTS)) &&
 		    netif_rx_reschedule(dev, napi))
@@ -344,7 +353,7 @@ poll_more:
 	return done;
 }
 
-void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
+void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr)
 {
 	struct net_device *dev = dev_ptr;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -352,6 +361,13 @@ void ipoib_ib_completion(struct ib_cq *c
 	netif_rx_schedule(dev, &priv->napi);
 }
 
+void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+
+	poll_tx(priv, 1);
+}
+
 static inline int post_send(struct ipoib_dev_priv *priv,
 			    unsigned int wr_id,
 			    struct ib_ah *address, u32 qpn,
@@ -471,6 +487,10 @@ void ipoib_send(struct net_device *dev, 
 			netif_stop_queue(dev);
 		}
 	}
+
+	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE + 1))
+		poll_tx(priv, 0);
+
 	return;
 
 drop:
@@ -623,7 +643,7 @@ void ipoib_drain_cq(struct net_device *d
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int i, n;
 	do {
-		n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
+		n = ib_poll_cq(priv->rcq, IPOIB_NUM_WC, priv->ibwc);
 		for (i = 0; i < n; ++i) {
 			/*
 			 * Convert any successful completions to flush
@@ -642,7 +662,7 @@ void ipoib_drain_cq(struct net_device *d
 				if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
 					ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
 				else
-					ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
+					ipoib_ib_handle_tx_wc(dev, priv->ibwc + i, 1);
 			}
 		}
 	} while (n == IPOIB_NUM_WC);
@@ -737,7 +757,7 @@ timeout:
 		msleep(1);
 	}
 
-	ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP);
+	ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP);
 
 	return 0;
 }
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2008-01-31 11:42:32.043502000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h	2008-01-31 14:16:34.690314000 +0200
@@ -94,6 +94,8 @@ enum {
 	IPOIB_MCAST_FLAG_SENDONLY = 1,
 	IPOIB_MCAST_FLAG_BUSY 	  = 2,	/* joining or already joined */
 	IPOIB_MCAST_FLAG_ATTACHED = 3,
+
+	MAX_SEND_CQE              = 16,
 };
 
 #define	IPOIB_OP_RECV   (1ul << 31)
@@ -348,7 +350,8 @@ struct ipoib_dev_priv {
 	u16               pkey_index;
 	struct ib_pd  	 *pd;
 	struct ib_mr  	 *mr;
-	struct ib_cq  	 *cq;
+	struct ib_cq  	 *rcq;
+	struct ib_cq  	 *scq;
 	struct ib_qp  	 *qp;
 	u32           	  qkey;
 
@@ -368,7 +371,8 @@ struct ipoib_dev_priv {
 	struct ib_send_wr    tx_wr;
 	unsigned             tx_outstanding;
 
-	struct ib_wc ibwc[IPOIB_NUM_WC];
+	struct ib_wc 	     ibwc[IPOIB_NUM_WC];
+	struct ib_wc         send_wc[MAX_SEND_CQE];
 
 	struct list_head dead_ahs;
 
@@ -449,7 +453,8 @@ extern struct workqueue_struct *ipoib_wo
 /* functions */
 
 int ipoib_poll(struct napi_struct *napi, int budget);
-void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
+void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr);
+void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr);
 
 struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
 				 struct ib_pd *pd, struct ib_ah_attr *attr);
@@ -697,7 +702,6 @@ static inline int ipoib_register_debugfs
 static inline void ipoib_unregister_debugfs(void) { }
 #endif
 
-
 #define ipoib_printk(level, priv, format, arg...)	\
 	printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
 #define ipoib_warn(priv, format, arg...)		\
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-01-31 11:42:32.175502000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-01-31 12:39:19.616627000 +0200
@@ -173,37 +173,42 @@ int ipoib_transport_dev_init(struct net_
 		goto out_free_pd;
 	}
 
-	size = ipoib_sendq_size + ipoib_recvq_size + 1;
+        size = ipoib_recvq_size;
 	ret = ipoib_cm_dev_init(dev);
 	if (!ret)
 		size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
 
-	priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
-	if (IS_ERR(priv->cq)) {
-		printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
+	priv->rcq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, dev, size, 0);
+	if (IS_ERR(priv->rcq)) {
+		printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
 		goto out_free_mr;
 	}
 
+	priv->scq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, dev, ipoib_sendq_size, 0);
+	if (IS_ERR(priv->scq)) {
+		printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
+		goto out_free_rcq;
+	}
+
+
 	coal = kzalloc(sizeof *coal, GFP_KERNEL);
 	if (coal) {
 		coal->rx_coalesce_usecs = 10;
-		coal->tx_coalesce_usecs = 10;
 		coal->rx_max_coalesced_frames = 16;
-		coal->tx_max_coalesced_frames = 16;
 		dev->ethtool_ops->set_coalesce(dev, coal);
 		kfree(coal);
 	}
 
-	if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
-		goto out_free_cq;
+	if (ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP))
+		goto out_free_scq;
 
-	init_attr.send_cq = priv->cq;
-	init_attr.recv_cq = priv->cq;
+	init_attr.send_cq = priv->scq;
+	init_attr.recv_cq = priv->rcq;
 
 	priv->qp = ib_create_qp(priv->pd, &init_attr);
 	if (IS_ERR(priv->qp)) {
 		printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
-		goto out_free_cq;
+		goto out_free_rcq;
 	}
 
 	priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
@@ -219,8 +224,11 @@ int ipoib_transport_dev_init(struct net_
 
 	return 0;
 
-out_free_cq:
-	ib_destroy_cq(priv->cq);
+out_free_scq:
+	ib_destroy_cq(priv->scq);
+
+out_free_rcq:
+	ib_destroy_cq(priv->rcq);
 
 out_free_mr:
 	ib_dereg_mr(priv->mr);
@@ -243,7 +251,10 @@ void ipoib_transport_dev_cleanup(struct 
 		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 	}
 
-	if (ib_destroy_cq(priv->cq))
+	if (ib_destroy_cq(priv->scq))
+		ipoib_warn(priv, "ib_cq_destroy failed\n");
+
+	if (ib_destroy_cq(priv->rcq))
 		ipoib_warn(priv, "ib_cq_destroy failed\n");
 
 	ipoib_cm_dev_cleanup(dev);
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2008-01-31 11:42:31.770505000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2008-01-31 12:18:43.243122000 +0200
@@ -199,8 +199,8 @@ static struct ib_qp *ipoib_cm_create_rx_
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_init_attr attr = {
 		.event_handler = ipoib_cm_rx_event_handler,
-		.send_cq = priv->cq, /* For drain WR */
-		.recv_cq = priv->cq,
+		.send_cq = priv->rcq, /* For drain WR */
+		.recv_cq = priv->rcq,
 		.srq = priv->cm.srq,
 		.cap.max_send_wr = 1, /* For drain WR */
 		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
@@ -791,8 +791,8 @@ static struct ib_qp *ipoib_cm_create_tx_
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_init_attr attr = {
-		.send_cq		= priv->cq,
-		.recv_cq		= priv->cq,
+		.send_cq		= priv->rcq,
+		.recv_cq		= priv->rcq,
 		.srq			= priv->cm.srq,
 		.cap.max_send_wr	= ipoib_sendq_size,
 		.cap.max_send_sge	= 1,
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_etool.c	2008-01-31 11:42:32.052502000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c	2008-01-31 11:42:32.421466000 +0200
@@ -69,7 +69,7 @@ static int ipoib_set_coalesce(struct net
 	    coal->tx_max_coalesced_frames > 0xffff)
 		return -EINVAL;
 
-	ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames,
+	ret = ib_modify_cq(priv->rcq, coal->rx_max_coalesced_frames,
 	coal->rx_coalesce_usecs);
 	if (ret) {
 			ipoib_dbg(priv, "failed modifying CQ\n");
Unsingnalled UD QP

This is patch is using unsignalled QP for UD. Doing this
reduces the number of times a CQ has to be polled and along
with the fact that we do polling on the tx CQ, reduces the
overhead on send and improving small messages BW.

For example, on my Intel machines, send throughput of 128 byte
UDP messages, went up from 380 mbps to 508 mbps.

Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
---
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -373,6 +373,7 @@ struct ipoib_dev_priv {
 
 	struct ib_wc 	     ibwc[IPOIB_NUM_WC];
 	struct ib_wc         send_wc[MAX_SEND_CQE];
+	unsigned int	     tx_poll;
 
 	struct list_head dead_ahs;
 
@@ -392,6 +393,8 @@ struct ipoib_dev_priv {
 	struct dentry *path_dentry;
 #endif
 	struct ipoib_ethtool_st etool;
+	struct timer_list poll_timer;
+	struct ib_ah *own_ah;
 };
 
 struct ipoib_ah {
@@ -454,7 +457,6 @@ extern struct workqueue_struct *ipoib_wo
 
 int ipoib_poll(struct napi_struct *napi, int budget);
 void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr);
-void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr);
 
 struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
 				 struct ib_pd *pd, struct ib_ah_attr *attr);
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -254,12 +254,10 @@ repost:
 			   "for buf %d\n", wr_id);
 }
 
-static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc, int need_lock)
+static void _ipoib_ib_handle_tx_wc(struct net_device *dev, int wr_id)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	unsigned int wr_id = wc->wr_id;
 	struct ipoib_tx_buf *tx_req;
-	unsigned long flags;
 
 	ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
 		       wr_id, wc->status);
@@ -272,39 +270,52 @@ static void ipoib_ib_handle_tx_wc(struct
 
 	tx_req = &priv->tx_ring[wr_id];
 
-	ipoib_dma_unmap_tx(priv->ca, tx_req);
-
-	++dev->stats.tx_packets;
-	dev->stats.tx_bytes += tx_req->skb->len;
-
-	dev_kfree_skb_any(tx_req->skb);
-
-	if (need_lock)
-		spin_lock_irqsave(&priv->tx_lock, flags);
-
+	if (tx_req->skb) {
+		ipoib_dma_unmap_tx(priv->ca, tx_req);
+		++dev->stats.tx_packets;
+		dev->stats.tx_bytes += tx_req->skb->len;
+		dev_kfree_skb_any(tx_req->skb);
+	}
 	++priv->tx_tail;
 	if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
 	    netif_queue_stopped(dev) &&
 	    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
 		netif_wake_queue(dev);
+}
+
+static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	unsigned int wr_id = wc->wr_id;
+	int i;
+
+	i = priv->tx_poll;
+	do {
+		i &= (ipoib_sendq_size - 1);
+		_ipoib_ib_handle_tx_wc(dev, i);
+	} while (i++ != wr_id);
+	priv->tx_poll = i & (ipoib_sendq_size - 1);
 
-	if (need_lock)
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
+	if (unlikely(wc->status != IB_WC_SUCCESS &&
+		     wc->status != IB_WC_WR_FLUSH_ERR))
 
-	if (wc->status != IB_WC_SUCCESS &&
-	    wc->status != IB_WC_WR_FLUSH_ERR)
 		ipoib_warn(priv, "failed send event "
 			   "(status=%d, wrid=%d vend_err %x)\n",
 			   wc->status, wr_id, wc->vendor_err);
 }
 
-static void poll_tx(struct ipoib_dev_priv *priv, int need_lock)
+void poll_tx(struct ipoib_dev_priv *priv)
 {
 	int n, i;
 
-	n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc);
-	for (i = 0; i < n; ++i)
-		ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i, need_lock);
+	while (1) {
+		n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc);
+		for (i = 0; i < n; ++i)
+			ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
+
+		if (n < MAX_SEND_CQE)
+			break;
+	}
 }
 
 int ipoib_poll(struct napi_struct *napi, int budget)
@@ -361,11 +372,65 @@ void ipoib_ib_rx_completion(struct ib_cq
 	netif_rx_schedule(dev, &priv->napi);
 }
 
-void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr)
+static inline int post_zlen_send_wr(struct ipoib_dev_priv *priv, unsigned wrid)
+{
+	struct ib_send_wr wr = {
+		.opcode = IB_WR_SEND,
+		.send_flags = IB_SEND_SIGNALED,
+		.wr_id = wrid,
+	};
+	struct ib_send_wr *bad_wr;
+
+	if (!priv->own_ah)
+		return -EBUSY;
+
+	wr.wr.ud.ah = priv->own_ah;
+	wr.wr.ud.remote_qpn = priv->qp->qp_num;
+	return ib_post_send(priv->qp, &wr, &bad_wr);
+}
+
+static void ipoib_ib_tx_timer_func(unsigned long dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	unsigned long flags;
+	unsigned int wrid;
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	if (((int)priv->tx_tail - (int)priv->tx_head < 0) &&
+		time_after(jiffies, dev->trans_start + 10)) {
+		wrid = priv->tx_head & (ipoib_sendq_size - 1);
+		ipoib_dbg(priv, "posting zlen send, wrid = %d: head = %d, tail = %d\n", wrid,
+				priv->tx_head, priv->tx_tail);
+		priv->tx_ring[wrid].skb = NULL;
+		if (post_zlen_send_wr(priv, wrid))
+			ipoib_warn(priv, "failed to post zlen send\n");
+		else {
+			++priv->tx_head;
+			++priv->tx_outstanding;
+			ipoib_dbg(priv, "%s-%d: head = %d\n", __func__, __LINE__, priv->tx_head);
+		}
+	}
+	poll_tx(priv);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+	mod_timer(&priv->poll_timer, jiffies + HZ / 2);
+}
+
+static void flush_tx_queue(struct ipoib_dev_priv *priv)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+	unsigned long flags;
+	unsigned int wrid;
 
-	poll_tx(priv, 1);
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	wrid = priv->tx_head & (ipoib_sendq_size - 1);
+	priv->tx_ring[wrid].skb = NULL;
+	if (!post_zlen_send_wr(priv, wrid)) {
+		++priv->tx_head;
+		++priv->tx_outstanding;
+	}
+	poll_tx(priv);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 }
 
 static inline int post_send(struct ipoib_dev_priv *priv,
@@ -405,6 +470,11 @@ static inline int post_send(struct ipoib
 	} else
 		priv->tx_wr.opcode      = IB_WR_SEND;
 
+	if (unlikely((priv->tx_head & (MAX_SEND_CQE - 1)) == MAX_SEND_CQE - 1))
+		priv->tx_wr.send_flags |= IB_SEND_SIGNALED;
+	else
+		priv->tx_wr.send_flags &= ~IB_SEND_SIGNALED;
+
 	return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
 
@@ -489,7 +559,7 @@ void ipoib_send(struct net_device *dev, 
 	}
 
 	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE + 1))
-		poll_tx(priv, 0);
+		poll_tx(priv);
 
 	return;
 
@@ -530,6 +600,32 @@ void ipoib_reap_ah(struct work_struct *w
 				   round_jiffies_relative(HZ));
 }
 
+static int create_own_ah(struct ipoib_dev_priv *priv)
+{
+	struct ib_ah_attr attr = {
+		.dlid = priv->local_lid,
+		.port_num = priv->port,
+	};
+
+	if (priv->own_ah) {
+		ipoib_dbg(priv, "own ah already exists\n");
+		return -EINVAL;
+	}
+	priv->own_ah = ib_create_ah(priv->pd, &attr);
+	return IS_ERR(priv->own_ah);
+}
+
+static void destroy_own_ah(struct ipoib_dev_priv *priv)
+{
+	if (!priv->own_ah) {
+		ipoib_dbg(priv, "destroying an already destroyed own ah\n");
+		return;
+	}
+
+	ib_destroy_ah(priv->own_ah);
+	priv->own_ah = NULL;
+}
+
 int ipoib_ib_dev_open(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -542,9 +638,17 @@ int ipoib_ib_dev_open(struct net_device 
 	}
 	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 
+	ret = create_own_ah(priv);
+	if (ret) {
+		priv->own_ah = NULL;
+		ipoib_warn(priv, "failed to create own ah\n");
+		return -1;
+	}
+
 	ret = ipoib_init_qp(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
+		destroy_own_ah(priv);
 		return -1;
 	}
 
@@ -566,6 +670,11 @@ int ipoib_ib_dev_open(struct net_device 
 	queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
 			   round_jiffies_relative(HZ));
 
+	init_timer(&priv->poll_timer);
+	priv->poll_timer.function = ipoib_ib_tx_timer_func;
+	priv->poll_timer.data = (unsigned long)dev;
+        mod_timer(&priv->poll_timer, jiffies + HZ / 2);
+
 	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
 
 	return 0;
@@ -662,7 +771,7 @@ void ipoib_drain_cq(struct net_device *d
 				if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
 					ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
 				else
-					ipoib_ib_handle_tx_wc(dev, priv->ibwc + i, 1);
+					ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
 			}
 		}
 	} while (n == IPOIB_NUM_WC);
@@ -673,12 +782,14 @@ int ipoib_ib_dev_stop(struct net_device 
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_attr qp_attr;
 	unsigned long begin;
-	struct ipoib_tx_buf *tx_req;
 	int i;
+	unsigned long flags;
 
+        del_timer_sync(&priv->poll_timer);
 	clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
 
 	ipoib_cm_dev_stop(dev);
+	flush_tx_queue(priv);
 
 	/*
 	 * Move our QP to the error state and then reinitialize in
@@ -700,32 +811,30 @@ int ipoib_ib_dev_stop(struct net_device 
 			 * assume the HW is wedged and just free up
 			 * all our pending work requests.
 			 */
-			while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
-				tx_req = &priv->tx_ring[priv->tx_tail &
-							(ipoib_sendq_size - 1)];
-				ipoib_dma_unmap_tx(priv->ca, tx_req);
-				dev_kfree_skb_any(tx_req->skb);
-				++priv->tx_tail;
-				--priv->tx_outstanding;
-			}
-
 			for (i = 0; i < ipoib_recvq_size; ++i) {
 				struct ipoib_rx_buf *rx_req;
 
 				rx_req = &priv->rx_ring[i];
-				if (!rx_req->skb)
-					continue;
-				ib_dma_unmap_single(priv->ca,
-						    rx_req->mapping,
-						    IPOIB_BUF_SIZE,
-						    DMA_FROM_DEVICE);
-				dev_kfree_skb_any(rx_req->skb);
-				rx_req->skb = NULL;
+
+				if (rx_req->skb) {
+					ib_dma_unmap_single(priv->ca,
+							    rx_req->mapping,
+							    IPOIB_BUF_SIZE,
+							    DMA_FROM_DEVICE);
+					dev_kfree_skb_any(rx_req->skb);
+					rx_req->skb = NULL;
+				}
 			}
 
 			goto timeout;
 		}
 
+		if ((int) priv->tx_tail - (int) priv->tx_head < 0) {
+			spin_lock_irqsave(&priv->tx_lock, flags);
+			poll_tx(priv);
+			spin_unlock_irqrestore(&priv->tx_lock, flags);
+		}
+
 		ipoib_drain_cq(dev);
 
 		msleep(1);
@@ -734,6 +843,7 @@ int ipoib_ib_dev_stop(struct net_device 
 	ipoib_dbg(priv, "All sends and receives done.\n");
 
 timeout:
+	destroy_own_ah(priv);
 	qp_attr.qp_state = IB_QPS_RESET;
 	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
 		ipoib_warn(priv, "Failed to modify QP to RESET state\n");
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -153,7 +153,7 @@ int ipoib_transport_dev_init(struct net_
 			.max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1,
 			.max_recv_sge = 1
 		},
-		.sq_sig_type = IB_SIGNAL_ALL_WR,
+		.sq_sig_type = IB_SIGNAL_REQ_WR,
 		.qp_type     = IB_QPT_UD,
 		.create_flags = QP_CREATE_LSO,
 	};
@@ -184,7 +184,7 @@ int ipoib_transport_dev_init(struct net_
 		goto out_free_mr;
 	}
 
-	priv->scq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, dev, ipoib_sendq_size, 0);
+	priv->scq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0);
 	if (IS_ERR(priv->scq)) {
 		printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
 		goto out_free_rcq;
IB/ipoib: rx WQE draft in IPOIB UD
    
Put a prepared WQE in the private data to save time in
the receive flow.
    
Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
---

Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-01-28 10:12:28.000000000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-01-28 10:18:39.000000000 +0200
@@ -92,21 +92,13 @@ void ipoib_free_ah(struct kref *kref)
 static int ipoib_ib_post_receive(struct net_device *dev, int id)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ib_sge list;
-	struct ib_recv_wr param;
 	struct ib_recv_wr *bad_wr;
 	int ret;
 
-	list.addr     = priv->rx_ring[id].mapping;
-	list.length   = IPOIB_BUF_SIZE;
-	list.lkey     = priv->mr->lkey;
-
-	param.next    = NULL;
-	param.wr_id   = id | IPOIB_OP_RECV;
-	param.sg_list = &list;
-	param.num_sge = 1;
+	priv->sglist_draft.addr = priv->rx_ring[id].mapping;
+	priv->rx_wr_draft.wr_id = id | IPOIB_OP_RECV;
 
-	ret = ib_post_recv(priv->qp, &param, &bad_wr);
+	ret = ib_post_recv(priv->qp, &priv->rx_wr_draft, &bad_wr);
 	if (unlikely(ret)) {
 		ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
 		ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping,
@@ -202,7 +194,7 @@ static void ipoib_ib_handle_rx_wc(struct
 	 * Drop packets that this interface sent, ie multicast packets
 	 * that the HCA has replicated.
 	 */
-	if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
+	if (unlikely(wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num))
 		goto repost;
 
 	/*
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2008-01-28 10:12:28.000000000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h	2008-01-28 10:17:10.000000000 +0200
@@ -327,6 +327,8 @@ struct ipoib_dev_priv {
 	spinlock_t lock;
 
 	struct net_device *dev;
+	struct ib_recv_wr rx_wr_draft;
+	struct ib_sge sglist_draft;
 
 	struct napi_struct napi;
 
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-01-28 10:12:28.000000000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-01-28 10:17:10.000000000 +0200
@@ -217,6 +217,13 @@ int ipoib_transport_dev_init(struct net_
 	priv->tx_wr.sg_list 	= priv->tx_sge;
 	priv->tx_wr.send_flags 	= IB_SEND_SIGNALED;
 
+	priv->rx_wr_draft.next = NULL;
+	priv->rx_wr_draft.sg_list = &priv->sglist_draft;
+	priv->rx_wr_draft.num_sge = 1;
+
+	priv->sglist_draft.length = IPOIB_BUF_SIZE;
+	priv->sglist_draft.lkey = priv->mr->lkey;
+
 	return 0;
 
 out_free_cq:
IB/ipoib: IPOIB rx post list
    
Post a list of RX buffers every 16 recieved packets. This
should reduce code cache trashing by make less jumps between
the hw driver to ipoib. In any case it improves UD receive flow.
    
Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
---

IB/ipoib: IPOIB rx post list
    
Post a list of RX buffers every 16 recieved packets. This
should reduce code cache trashing by make less jumps between
the hw driver to ipoib. In any case it improves receive flow.
    
Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
---

Index: ofed_kernel-2.6.11/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofed_kernel-2.6.11.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2008-02-03 18:32:37.000000000 +0200
+++ ofed_kernel-2.6.11/drivers/infiniband/ulp/ipoib/ipoib.h	2008-02-03 18:34:34.000000000 +0200
@@ -97,6 +97,7 @@ enum {
 	IPOIB_MCAST_FLAG_ATTACHED = 3,
 
 	MAX_SEND_CQE              = 16,
+	UD_POST_RCV_COUNT         = 16,
 };
 
 #define	IPOIB_OP_RECV   (1ul << 31)
@@ -326,9 +327,10 @@ struct ipoib_ethtool_st {
 struct ipoib_dev_priv {
 	spinlock_t lock;
 
-	struct net_device *dev;
-	struct ib_recv_wr rx_wr_draft;
-	struct ib_sge sglist_draft;
+	struct net_device      *dev;
+	struct ib_recv_wr	rx_wr_draft[UD_POST_RCV_COUNT];
+	struct ib_sge 		sglist_draft[UD_POST_RCV_COUNT];
+	unsigned int		rx_outst;
 
 	struct napi_struct napi;
 
Index: ofed_kernel-2.6.11/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- ofed_kernel-2.6.11.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-02-03 18:32:37.000000000 +0200
+++ ofed_kernel-2.6.11/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-02-03 18:33:44.000000000 +0200
@@ -89,23 +89,45 @@ void ipoib_free_ah(struct kref *kref)
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static int ipoib_ib_post_receive(struct net_device *dev, int id)
+static void clean_pending_receives(struct ipoib_dev_priv *priv)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ib_recv_wr *bad_wr;
-	int ret;
-
-	priv->sglist_draft.addr = priv->rx_ring[id].mapping;
-	priv->rx_wr_draft.wr_id = id | IPOIB_OP_RECV;
+	int i;
+	int id;
 
-	ret = ib_post_recv(priv->qp, &priv->rx_wr_draft, &bad_wr);
-	if (unlikely(ret)) {
-		ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
+	for (i = 0; i < priv->rx_outst; ++i) {
+		id = priv->rx_wr_draft[i].wr_id & ~IPOIB_OP_RECV;
 		ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping,
-				    IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+                                            IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 		dev_kfree_skb_any(priv->rx_ring[id].skb);
 		priv->rx_ring[id].skb = NULL;
 	}
+	priv->rx_outst = 0;
+}
+
+static int ipoib_ib_post_receive(struct net_device *dev, int id)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ib_recv_wr *bad_wr;
+	int ret = 0;
+	int i = priv->rx_outst;
+
+	priv->sglist_draft[i].addr = priv->rx_ring[id].mapping;
+	priv->rx_wr_draft[i].wr_id = id | IPOIB_OP_RECV;
+	if (++priv->rx_outst == UD_POST_RCV_COUNT) {
+		ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr);
+
+		if (unlikely(ret)) {
+			ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
+			while (bad_wr) {
+				id = bad_wr->wr_id & ~IPOIB_OP_RECV;
+				ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping,
+						    IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+				dev_kfree_skb_any(priv->rx_ring[id].skb);
+				priv->rx_ring[id].skb = NULL;
+			}
+		}
+		priv->rx_outst = 0;
+	}
 
 	return ret;
 }
@@ -791,6 +813,7 @@ int ipoib_ib_dev_stop(struct net_device 
 	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
 		ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
 
+	clean_pending_receives(priv);
 	/* Wait for all sends and receives to complete */
 	begin = jiffies;
 
Index: ofed_kernel-2.6.11/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- ofed_kernel-2.6.11.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-02-03 18:32:37.000000000 +0200
+++ ofed_kernel-2.6.11/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-02-03 18:33:44.000000000 +0200
@@ -226,12 +226,16 @@ int ipoib_transport_dev_init(struct net_
 	priv->tx_wr.sg_list 	= priv->tx_sge;
 	priv->tx_wr.send_flags 	= IB_SEND_SIGNALED;
 
-	priv->rx_wr_draft.next = NULL;
-	priv->rx_wr_draft.sg_list = &priv->sglist_draft;
-	priv->rx_wr_draft.num_sge = 1;
-
-	priv->sglist_draft.length = IPOIB_BUF_SIZE;
-	priv->sglist_draft.lkey = priv->mr->lkey;
+	for (i = 0; i < UD_POST_RCV_COUNT; ++i) {
+		priv->sglist_draft[i].length = IPOIB_BUF_SIZE;
+		priv->sglist_draft[i].lkey = priv->mr->lkey;
+
+		priv->rx_wr_draft[i].sg_list = &priv->sglist_draft[i];
+		priv->rx_wr_draft[i].num_sge = 1;
+		if (i < UD_POST_RCV_COUNT - 1)
+			priv->rx_wr_draft[i].next = &priv->rx_wr_draft[i + 1];
+	}
+	priv->rx_wr_draft[i].next = NULL;
 
 	return 0;
 
IB/ipoib: post to SRQ every n buffers

To reduce the overhead of posting receive buffers to the SRQ,
we do it every 16 received buffers.

Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
---


Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -98,6 +98,7 @@ enum {
 
 	MAX_SEND_CQE              = 16,
 	UD_POST_RCV_COUNT         = 16,
+	CM_POST_SRQ_COUNT         = 16,
 };
 
 #define	IPOIB_OP_RECV   (1ul << 31)
@@ -288,6 +289,11 @@ struct ipoib_cm_rx_buf {
 	u64 mapping[IPOIB_CM_RX_SG];
 };
 
+struct ipoib_cm_rx_wr {
+	struct ib_recv_wr	wr;
+	struct ib_sge		rx_sge[IPOIB_CM_RX_SG];
+};
+
 struct ipoib_cm_dev_priv {
 	struct ib_srq  	       *srq;
 	struct ipoib_cm_rx_buf *srq_ring;
@@ -311,6 +317,8 @@ struct ipoib_cm_dev_priv {
 	int			nonsrq_conn_qp;
 	int			max_cm_mtu;
 	int			num_frags;
+	struct ipoib_cm_rx_wr  *head;
+	struct ipoib_cm_rx_wr   *rx_wr_arr;
 };
 
 struct ipoib_ethtool_st {
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -88,24 +88,43 @@ static void ipoib_cm_dma_unmap_rx(struct
 		ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
 }
 
-static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
+static int ipoib_cm_post_receive_srq(struct net_device *dev, int id, int pi)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_recv_wr *bad_wr;
-	int i, ret;
-
-	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
+	int i, ret = 0;
+	struct ipoib_cm_rx_wr *cur;
+	struct ipoib_cm_rx_wr *prev;
+	int post;
+
+	ipoib_dbg_data(priv, "posting to id=%d, pi=%d\n", id, pi);
+	cur = &priv->cm.rx_wr_arr[id];
+	prev = &priv->cm.rx_wr_arr[(id - 1) & (ipoib_recvq_size - 1)];
+
+	prev->wr.next = &cur->wr;
+	cur->wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
+	cur->wr.next = NULL;
 
 	for (i = 0; i < priv->cm.num_frags; ++i)
-		priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
+		cur->rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
 
-	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
-	if (unlikely(ret)) {
-		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
-		ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
-				      priv->cm.srq_ring[id].mapping);
-		dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
-		priv->cm.srq_ring[id].skb = NULL;
+	post = pi || (((unsigned long)(cur - priv->cm.head) & (ipoib_recvq_size - 1))
+		      >= CM_POST_SRQ_COUNT);
+
+	if (post) {
+		ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.head->wr, &bad_wr);
+		if (unlikely(ret)) {
+			ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
+			while (bad_wr) {
+				id = bad_wr->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
+				ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
+						      priv->cm.srq_ring[id].mapping);
+				dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
+				priv->cm.srq_ring[id].skb = NULL;
+				bad_wr = bad_wr->next;
+			}
+		} else
+			priv->cm.head = &priv->cm.rx_wr_arr[(id + 1) & (ipoib_recvq_size - 1)];
 	}
 
 	return ret;
@@ -615,7 +634,7 @@ void ipoib_cm_handle_rx_wc(struct net_de
 
 repost:
 	if (has_srq) {
-		if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id)))
+		if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id, 0)))
 			ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
 				   "for buf %d\n", wr_id);
 	} else {
@@ -1432,20 +1451,36 @@ static void ipoib_cm_create_srq(struct n
 		return;
 	}
 
+	priv->cm.rx_wr_arr = kzalloc(ipoib_recvq_size *
+				     sizeof priv->cm.rx_wr_arr[0], GFP_KERNEL);
+	if (!priv->cm.rx_wr_arr) {
+		ipoib_warn(priv, "failed allocating SRQ wr array\n");
+		goto destory_srq;
+	}
+
+
 	priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring,
 				    GFP_KERNEL);
 	if (!priv->cm.srq_ring) {
 		printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n",
 		       priv->ca->name, ipoib_recvq_size);
-		ib_destroy_srq(priv->cm.srq);
-		priv->cm.srq = NULL;
+		goto free_wr_array;
 	}
+
+	return;
+
+free_wr_array:
+	kfree(priv->cm.rx_wr_arr);
+	priv->cm.rx_wr_arr = NULL;
+destory_srq:
+	ib_destroy_srq(priv->cm.srq);
+	priv->cm.srq = NULL;
 }
 
 int ipoib_cm_dev_init(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	int i, ret;
+	int i, ret, j;
 	struct ib_device_attr attr;
 
 	INIT_LIST_HEAD(&priv->cm.passive_ids);
@@ -1486,6 +1521,19 @@ int ipoib_cm_dev_init(struct net_device 
 	for (i = 0; i < priv->cm.num_frags; ++i)
 		priv->cm.rx_sge[i].lkey	= priv->mr->lkey;
 
+	for (j = 0; j < ipoib_recvq_size; ++j) {
+		for (i = 0; i < priv->cm.num_frags; ++i)
+			priv->cm.rx_wr_arr[j].rx_sge[i].lkey = priv->mr->lkey;
+
+		priv->cm.rx_wr_arr[j].rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
+		for (i = 1; i < priv->cm.num_frags; ++i)
+			priv->cm.rx_wr_arr[j].rx_sge[i].length = PAGE_SIZE;
+
+		priv->cm.rx_wr_arr[j].wr.sg_list = priv->cm.rx_wr_arr[j].rx_sge;
+		priv->cm.rx_wr_arr[j].wr.num_sge = priv->cm.num_frags;
+	}
+        priv->cm.head = &priv->cm.rx_wr_arr[0];
+
 	priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
 	for (i = 1; i < priv->cm.num_frags; ++i)
 		priv->cm.rx_sge[i].length = PAGE_SIZE;
@@ -1502,7 +1550,7 @@ int ipoib_cm_dev_init(struct net_device 
 				ipoib_cm_dev_cleanup(dev);
 				return -ENOMEM;
 			}
-			if (ipoib_cm_post_receive_srq(dev, i)) {
+			if (ipoib_cm_post_receive_srq(dev, i, 1)) {
 				ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
 				ipoib_cm_dev_cleanup(dev);
 				return -EIO;
@@ -1534,4 +1582,6 @@ void ipoib_cm_dev_cleanup(struct net_dev
 
 	ipoib_cm_free_rx_ring(dev, priv->cm.srq_ring);
 	priv->cm.srq_ring = NULL;
+
+	kfree(priv->cm.rx_wr_arr);
 }
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to