> Quoting Michael S. Tsirkin <[EMAIL PROTECTED]>:
> Subject: [PATCH RFC] use common cq for ipoib cm send side
> 
> The following untested patch moves all TX processing in IPoIB CM to common CQ.
> This should help reduce the number of interrupts for bi-directional traffic
> (such as TCP). Is this a good idea? What do others think?
> 
> Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

FYI, this was just thinking aloud.  The version below works fine here but the
performance gain seems to be very small (about 1%).  The gain with NAPI might
be bigger but this is yet to be tested.  I'll continue looking into this.

Feedback wellcome.

 ipoib.h      |   10 +++++--
 ipoib_cm.c   |   78 +++++++++++++++--------------------------------------------
 ipoib_ib.c   |   28 ++++++++++++---------
 ipoib_main.c |    2 -
 4 files changed, 45 insertions(+), 73 deletions(-)

------------

Use common CQ for all TX QPs: keep a per-device counter out outstanding
tx WRs, and stop the interface when this counter reaches the send queue size, to
avoid CQ overruns.

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

---

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h 
b/drivers/infiniband/ulp/ipoib/ipoib.h
index eb885ee..ef703c7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -99,9 +99,9 @@ enum {
 
 #define        IPOIB_OP_RECV   (1ul << 31)
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
-#define        IPOIB_CM_OP_SRQ (1ul << 30)
+#define        IPOIB_OP_CM     (1ul << 30)
 #else
-#define        IPOIB_CM_OP_SRQ (0)
+#define        IPOIB_OP_CM     (0)
 #endif
 
 /* structs */
@@ -144,7 +144,6 @@ struct ipoib_cm_rx {
 
 struct ipoib_cm_tx {
        struct ib_cm_id     *id;
-       struct ib_cq        *cq;
        struct ib_qp        *qp;
        struct list_head     list;
        struct net_device   *dev;
@@ -233,6 +232,7 @@ struct ipoib_dev_priv {
        unsigned             tx_tail;
        struct ib_sge        tx_sge;
        struct ib_send_wr    tx_wr;
+       unsigned             tx_outstanding;
 
        struct ib_wc ibwc[IPOIB_NUM_WC];
 
@@ -439,6 +439,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
 void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
                           unsigned int mtu);
 void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
+void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
 #else
 
 struct ipoib_cm_tx;
@@ -527,6 +528,9 @@ static inline void ipoib_cm_handle_rx_wc(struct net_device 
*dev, struct ib_wc *w
 {
 }
 
+static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc 
*wc)
+{
+}
 #endif
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 
b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 8ee6f06..af36562 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -85,7 +85,7 @@ static int ipoib_cm_post_receive(struct net_device *dev, int 
id)
        struct ib_recv_wr *bad_wr;
        int i, ret;
 
-       priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ;
+       priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
 
        for (i = 0; i < IPOIB_CM_RX_SG; ++i)
                priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
@@ -346,7 +346,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int 
hdr_space,
 void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
+       unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
        struct sk_buff *skb;
        struct ipoib_cm_rx *p;
        unsigned long flags;
@@ -433,7 +433,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
        priv->tx_sge.addr             = addr;
        priv->tx_sge.length           = len;
 
-       priv->tx_wr.wr_id             = wr_id;
+       priv->tx_wr.wr_id             = wr_id | IPOIB_OP_CM;
 
        return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
 }
@@ -484,20 +484,19 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff 
*skb, struct ipoib_cm_
                dev->trans_start = jiffies;
                ++tx->tx_head;
 
-               if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) {
+               if (++priv->tx_outstanding == ipoib_sendq_size) {
                        ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net 
queue\n",
                                  tx->qp->qp_num);
                        netif_stop_queue(dev);
-                       set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
                }
        }
 }
 
-static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx 
*tx,
-                                 struct ib_wc *wc)
+void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       unsigned int wr_id = wc->wr_id;
+       struct ipoib_cm_tx *tx = wc->qp->qp_context;
+       unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
        struct ipoib_tx_buf *tx_req;
        unsigned long flags;
 
@@ -522,11 +521,10 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, 
struct ipoib_cm_tx *tx
 
        spin_lock_irqsave(&priv->tx_lock, flags);
        ++tx->tx_tail;
-       if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) &&
-           tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) {
-               clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
+       if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
+           netif_queue_stopped(dev) &&
+           test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
                netif_wake_queue(dev);
-       }
 
        if (wc->status != IB_WC_SUCCESS &&
            wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -549,11 +547,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, 
struct ipoib_cm_tx *tx
                        tx->neigh = NULL;
                }
 
-               /* queue would be re-started anyway when TX is destroyed,
-                * but it makes sense to do it ASAP here. */
-               if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags))
-                       netif_wake_queue(dev);
-
                if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                        list_move(&tx->list, &priv->cm.reap_list);
                        queue_work(ipoib_workqueue, &priv->cm.reap_task);
@@ -567,19 +560,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, 
struct ipoib_cm_tx *tx
        spin_unlock_irqrestore(&priv->tx_lock, flags);
 }
 
-static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
-{
-       struct ipoib_cm_tx *tx = tx_ptr;
-       int n, i;
-
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-       do {
-               n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
-               for (i = 0; i < n; ++i)
-                       ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
-       } while (n == IPOIB_NUM_WC);
-}
-
 int ipoib_cm_dev_open(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -699,17 +679,18 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, 
struct ib_cm_event *even
        return 0;
 }
 
-static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct 
ib_cq *cq)
+static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct 
ipoib_cm_tx *tx)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_qp_init_attr attr = {};
        attr.recv_cq = priv->cq;
+       attr.send_cq = priv->cq;
        attr.srq = priv->cm.srq;
        attr.cap.max_send_wr = ipoib_sendq_size;
        attr.cap.max_send_sge = 1;
        attr.sq_sig_type = IB_SIGNAL_ALL_WR;
        attr.qp_type = IB_QPT_RC;
-       attr.send_cq = cq;
+       attr.qp_context = tx;
        return ib_create_qp(priv->pd, &attr);
 }
 
@@ -789,21 +770,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
                goto err_tx;
        }
 
-       p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p,
-                            ipoib_sendq_size + 1);
-       if (IS_ERR(p->cq)) {
-               ret = PTR_ERR(p->cq);
-               ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret);
-               goto err_cq;
-       }
-
-       ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP);
-       if (ret) {
-               ipoib_warn(priv, "failed to request completion notification: 
%d\n", ret);
-               goto err_req_notify;
-       }
-
-       p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq);
+       p->qp = ipoib_cm_create_tx_qp(p->dev, p);
        if (IS_ERR(p->qp)) {
                ret = PTR_ERR(p->qp);
                ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
@@ -840,12 +807,8 @@ err_modify:
 err_id:
        p->id = NULL;
        ib_destroy_qp(p->qp);
-err_req_notify:
 err_qp:
        p->qp = NULL;
-       ib_destroy_cq(p->cq);
-err_cq:
-       p->cq = NULL;
 err_tx:
        return ret;
 }
@@ -854,6 +817,7 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
 {
        struct ipoib_dev_priv *priv = netdev_priv(p->dev);
        struct ipoib_tx_buf *tx_req;
+       unsigned long flags;
 
        ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
                  p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
@@ -864,12 +828,6 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
        if (p->qp)
                ib_destroy_qp(p->qp);
 
-       if (p->cq)
-               ib_destroy_cq(p->cq);
-
-       if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags))
-               netif_wake_queue(p->dev);
-
        if (p->tx_ring) {
                while ((int) p->tx_tail - (int) p->tx_head < 0) {
                        tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 
1)];
@@ -877,6 +835,12 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
                                         DMA_TO_DEVICE);
                        dev_kfree_skb_any(tx_req->skb);
                        ++p->tx_tail;
+                       spin_lock_irqsave(&priv->tx_lock, flags);
+                       if (unlikely(--priv->tx_outstanding == ipoib_sendq_size 
>> 1) &&
+                           netif_queue_stopped(p->dev) &&
+                           test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+                               netif_wake_queue(p->dev);
+                       spin_unlock_irqrestore(&priv->tx_lock, flags);
                }
 
                kfree(p->tx_ring);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c 
b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f2aa923..19a3d3e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -266,11 +266,10 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, 
struct ib_wc *wc)
 
        spin_lock_irqsave(&priv->tx_lock, flags);
        ++priv->tx_tail;
-       if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) &&
-           priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) {
-               clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
+       if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
+           netif_queue_stopped(dev) &&
+           test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
                netif_wake_queue(dev);
-       }
        spin_unlock_irqrestore(&priv->tx_lock, flags);
 
        if (wc->status != IB_WC_SUCCESS &&
@@ -282,12 +281,17 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, 
struct ib_wc *wc)
 
 static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
 {
-       if (wc->wr_id & IPOIB_CM_OP_SRQ)
-               ipoib_cm_handle_rx_wc(dev, wc);
-       else if (wc->wr_id & IPOIB_OP_RECV)
-               ipoib_ib_handle_rx_wc(dev, wc);
-       else
-               ipoib_ib_handle_tx_wc(dev, wc);
+       if (wc->wr_id & IPOIB_OP_CM) {
+               if (wc->wr_id & IPOIB_OP_RECV)
+                       ipoib_cm_handle_rx_wc(dev, wc);
+               else
+                       ipoib_cm_handle_tx_wc(dev, wc);
+       } else {
+               if (wc->wr_id & IPOIB_OP_RECV)
+                       ipoib_ib_handle_rx_wc(dev, wc);
+               else
+                       ipoib_ib_handle_tx_wc(dev, wc);
+       }
 }
 
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
@@ -370,10 +374,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff 
*skb,
                address->last_send = priv->tx_head;
                ++priv->tx_head;
 
-               if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
+               if (++priv->tx_outstanding == ipoib_sendq_size) {
                        ipoib_dbg(priv, "TX ring full, stopping kernel net 
queue\n");
                        netif_stop_queue(dev);
-                       set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
                }
        }
 }
@@ -549,6 +552,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
                                                    DMA_TO_DEVICE);
                                dev_kfree_skb_any(tx_req->skb);
                                ++priv->tx_tail;
+                               --priv->tx_outstanding;
                        }
 
                        for (i = 0; i < ipoib_recvq_size; ++i) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c 
b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 19e82db..7c7b136 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -900,7 +900,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device 
*ca, int port)
                goto out_rx_ring_cleanup;
        }
 
-       /* priv->tx_head & tx_tail are already 0 */
+       /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
 
        if (ipoib_ib_dev_init(dev, ca, port))
                goto out_tx_ring_cleanup;

-- 
MST

_______________________________________________
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to