Re: [PATCH] IPoIB: serialize changing on tx_outstanding

2015-10-08 Thread Leon Romanovsky
>>> +   spin_lock_irqsave(>lock, flags);
>>> +   --priv->tx_outstanding;
>>> +   if (netif_queue_stopped(dev))
>>> +   netif_wake_queue(dev);
>>> +   spin_unlock_irqrestore(>lock, flags);
>>
>> Why are you locking the netif_* calls?
>
>
> Yes, I intended to do that.   This make the accessing on tx_outstanding and
> the reopening of the send queue in the same atomic session which is the
> expected behavior.
> Otherwise,  we may have the following problem:
> #time order
>
> thread1(on cpu1) thread2(on cpu2)
> lock
> modify/check tx_outstanding
> unlock
>
>
> lock
> modify/check tx_outstanding
> unlock
>
> reopen queue
>
>
>stop queue
>
>
> So that we actually want reopen the send queue, but the result is we stopped
> it.
Thanks for the explanation.

>
> thanks,
> wengang
>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IPoIB: serialize changing on tx_outstanding

2015-10-07 Thread Wengang Wang

Hi Leon,

thanks for review.

在 2015年10月08日 12:33, Leon Romanovsky 写道:

On Mon, Sep 28, 2015 at 01:42:10PM +0800, Wengang Wang wrote:

The changing on tx_outstanding should be protected by spinlock or to be
atomic operations.

Such log is found in dmesg:

Sep 16 14:20:53 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034733, 
tx_tail 1034733, tx_outstanding 359 ipoib_sendq_size: 512
Sep 16 14:21:33 naep11x06 kernel: ib0: transmit timeout: latency 9560 msecs
Sep 16 14:21:33 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034854, 
tx_tail 1034854, tx_outstanding 511 ipoib_sendq_size: 512
Sep 16 14:21:38 naep11x06 kernel: ib0: transmit timeout: latency 14568 msecs
Sep 16 14:21:38 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034854, 
tx_tail 1034854, tx_outstanding 511 ipoib_sendq_size: 512

And the send queue of ib0 kept full. When transmit timeout is reported,
queue is reported as "stopped", but the IPoIB stuff tx_head and tx_tail
points to same value. I am not able to see such numbers in ipoib_cm_tx
(for CM) because I have no vmcore. Though I am not quite sure it's caused
by parallel access of tx_outstanding(send path VS interrup path), we really
need to serialize the changeing on tx_outstanding.

This patch also make sure the increase of tx_outstanding prior to the
calling of post_send to avoid the possible decreasing before increasing in
case the running of increasing is scheduled later than the interrupt
handler.

Signed-off-by: Wengang Wang 
---
  drivers/infiniband/ulp/ipoib/ipoib_cm.c | 40 +++--
  drivers/infiniband/ulp/ipoib/ipoib_ib.c | 24 ++--
  2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 
b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index c78dc16..044da94 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff 
*skb, struct ipoib_cm_
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
+   unsigned long flags;
  
  	if (unlikely(skb->len > tx->mtu)) {

ipoib_warn(priv, "packet len %d (> %d) too long to send, 
dropping\n",
@@ -742,27 +743,36 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff 
*skb, struct ipoib_cm_
skb_orphan(skb);
skb_dst_drop(skb);
  
+	spin_lock_irqsave(>lock, flags);

+   if (++priv->tx_outstanding == ipoib_sendq_size) {
+   ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net 
queue\n",
+ tx->qp->qp_num);
+   netif_stop_queue(dev);
+   }
+   spin_unlock_irqrestore(>lock, flags);
+   if (netif_queue_stopped(dev)) {
+   rc = ib_req_notify_cq(priv->send_cq,
+   IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+   if (rc < 0)
+   ipoib_warn(priv, "request notify on send CQ failed\n");
+   else if (rc)
+   ipoib_send_comp_handler(priv->send_cq, dev);
+   }
+
rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
if (unlikely(rc)) {
ipoib_warn(priv, "post_send failed, error %d\n", rc);
++dev->stats.tx_errors;
+   spin_lock_irqsave(>lock, flags);
+   --priv->tx_outstanding;
+   if (netif_queue_stopped(dev))
+   netif_wake_queue(dev);
+   spin_unlock_irqrestore(>lock, flags);

Why are you locking the netif_* calls?


Yes, I intended to do that.   This make the accessing on tx_outstanding 
and the reopening of the send queue in the same atomic session which is 
the expected behavior.

Otherwise,  we may have the following problem:
#time order

thread1(on cpu1) thread2(on cpu2)
lock
modify/check tx_outstanding
unlock


lock
modify/check tx_outstanding
unlock

reopen queue


   stop queue


So that we actually want reopen the send queue, but the result is we 
stopped it.


thanks,
wengang


ipoib_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(skb);
} else {
dev->trans_start = jiffies;
++tx->tx_head;
-
-   if (++priv->tx_outstanding == ipoib_sendq_size) {
-   ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net 
queue\n",
- tx->qp->qp_num);
-   netif_stop_queue(dev);
-   rc = ib_req_notify_cq(priv->send_cq,
-   IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
-   if (rc < 0)
-   ipoib_warn(priv, "request notify on send CQ 
failed\n");
-   else if (rc)
-   ipoib_send_comp_handler(priv->send_cq, dev);
-   }
}
  

Re: [PATCH] IPoIB: serialize changing on tx_outstanding

2015-10-07 Thread Wengang Wang

Hi,
Any comment on this patch?

thanks,
wengang

在 2015年09月28日 13:42, Wengang Wang 写道:

The changing on tx_outstanding should be protected by spinlock or to be
atomic operations.

Such log is found in dmesg:

Sep 16 14:20:53 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034733, 
tx_tail 1034733, tx_outstanding 359 ipoib_sendq_size: 512
Sep 16 14:21:33 naep11x06 kernel: ib0: transmit timeout: latency 9560 msecs
Sep 16 14:21:33 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034854, 
tx_tail 1034854, tx_outstanding 511 ipoib_sendq_size: 512
Sep 16 14:21:38 naep11x06 kernel: ib0: transmit timeout: latency 14568 msecs
Sep 16 14:21:38 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034854, 
tx_tail 1034854, tx_outstanding 511 ipoib_sendq_size: 512

And the send queue of ib0 kept full. When transmit timeout is reported,
queue is reported as "stopped", but the IPoIB stuff tx_head and tx_tail
points to same value. I am not able to see such numbers in ipoib_cm_tx
(for CM) because I have no vmcore. Though I am not quite sure it's caused
by parallel access of tx_outstanding(send path VS interrup path), we really
need to serialize the changeing on tx_outstanding.

This patch also make sure the increase of tx_outstanding prior to the
calling of post_send to avoid the possible decreasing before increasing in
case the running of increasing is scheduled later than the interrupt
handler.

Signed-off-by: Wengang Wang 
---
  drivers/infiniband/ulp/ipoib/ipoib_cm.c | 40 +++--
  drivers/infiniband/ulp/ipoib/ipoib_ib.c | 24 ++--
  2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 
b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index c78dc16..044da94 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff 
*skb, struct ipoib_cm_
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
+   unsigned long flags;
  
  	if (unlikely(skb->len > tx->mtu)) {

ipoib_warn(priv, "packet len %d (> %d) too long to send, 
dropping\n",
@@ -742,27 +743,36 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff 
*skb, struct ipoib_cm_
skb_orphan(skb);
skb_dst_drop(skb);
  
+	spin_lock_irqsave(>lock, flags);

+   if (++priv->tx_outstanding == ipoib_sendq_size) {
+   ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net 
queue\n",
+ tx->qp->qp_num);
+   netif_stop_queue(dev);
+   }
+   spin_unlock_irqrestore(>lock, flags);
+   if (netif_queue_stopped(dev)) {
+   rc = ib_req_notify_cq(priv->send_cq,
+   IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+   if (rc < 0)
+   ipoib_warn(priv, "request notify on send CQ failed\n");
+   else if (rc)
+   ipoib_send_comp_handler(priv->send_cq, dev);
+   }
+
rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
if (unlikely(rc)) {
ipoib_warn(priv, "post_send failed, error %d\n", rc);
++dev->stats.tx_errors;
+   spin_lock_irqsave(>lock, flags);
+   --priv->tx_outstanding;
+   if (netif_queue_stopped(dev))
+   netif_wake_queue(dev);
+   spin_unlock_irqrestore(>lock, flags);
ipoib_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(skb);
} else {
dev->trans_start = jiffies;
++tx->tx_head;
-
-   if (++priv->tx_outstanding == ipoib_sendq_size) {
-   ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net 
queue\n",
- tx->qp->qp_num);
-   netif_stop_queue(dev);
-   rc = ib_req_notify_cq(priv->send_cq,
-   IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
-   if (rc < 0)
-   ipoib_warn(priv, "request notify on send CQ 
failed\n");
-   else if (rc)
-   ipoib_send_comp_handler(priv->send_cq, dev);
-   }
}
  }
  
@@ -796,10 +806,13 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)

netif_tx_lock(dev);
  
  	++tx->tx_tail;

+
+   spin_lock_irqsave(>lock, flags);
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, >flags))
netif_wake_queue(dev);
+   spin_unlock_irqrestore(>lock, flags);
  
  	if (wc->status != IB_WC_SUCCESS &&

wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -1169,6 +1182,7 @@ static void ipoib_cm_tx_destroy(struct 

Re: [PATCH] IPoIB: serialize changing on tx_outstanding

2015-10-07 Thread Leon Romanovsky
On Mon, Sep 28, 2015 at 01:42:10PM +0800, Wengang Wang wrote:
> The changing on tx_outstanding should be protected by spinlock or to be
> atomic operations.
> 
> Such log is found in dmesg:
> 
> Sep 16 14:20:53 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034733, 
> tx_tail 1034733, tx_outstanding 359 ipoib_sendq_size: 512
> Sep 16 14:21:33 naep11x06 kernel: ib0: transmit timeout: latency 9560 msecs
> Sep 16 14:21:33 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034854, 
> tx_tail 1034854, tx_outstanding 511 ipoib_sendq_size: 512
> Sep 16 14:21:38 naep11x06 kernel: ib0: transmit timeout: latency 14568 msecs
> Sep 16 14:21:38 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034854, 
> tx_tail 1034854, tx_outstanding 511 ipoib_sendq_size: 512
> 
> And the send queue of ib0 kept full. When transmit timeout is reported,
> queue is reported as "stopped", but the IPoIB stuff tx_head and tx_tail
> points to same value. I am not able to see such numbers in ipoib_cm_tx
> (for CM) because I have no vmcore. Though I am not quite sure it's caused
> by parallel access of tx_outstanding(send path VS interrup path), we really
> need to serialize the changeing on tx_outstanding.
> 
> This patch also make sure the increase of tx_outstanding prior to the
> calling of post_send to avoid the possible decreasing before increasing in
> case the running of increasing is scheduled later than the interrupt
> handler.
> 
> Signed-off-by: Wengang Wang 
> ---
>  drivers/infiniband/ulp/ipoib/ipoib_cm.c | 40 
> +++--
>  drivers/infiniband/ulp/ipoib/ipoib_ib.c | 24 ++--
>  2 files changed, 50 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 
> b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> index c78dc16..044da94 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> @@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff 
> *skb, struct ipoib_cm_
>   struct ipoib_dev_priv *priv = netdev_priv(dev);
>   struct ipoib_tx_buf *tx_req;
>   int rc;
> + unsigned long flags;
>  
>   if (unlikely(skb->len > tx->mtu)) {
>   ipoib_warn(priv, "packet len %d (> %d) too long to send, 
> dropping\n",
> @@ -742,27 +743,36 @@ void ipoib_cm_send(struct net_device *dev, struct 
> sk_buff *skb, struct ipoib_cm_
>   skb_orphan(skb);
>   skb_dst_drop(skb);
>  
> + spin_lock_irqsave(>lock, flags);
> + if (++priv->tx_outstanding == ipoib_sendq_size) {
> + ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net 
> queue\n",
> +   tx->qp->qp_num);
> + netif_stop_queue(dev);
> + }
> + spin_unlock_irqrestore(>lock, flags);
> + if (netif_queue_stopped(dev)) {
> + rc = ib_req_notify_cq(priv->send_cq,
> + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
> + if (rc < 0)
> + ipoib_warn(priv, "request notify on send CQ failed\n");
> + else if (rc)
> + ipoib_send_comp_handler(priv->send_cq, dev);
> + }
> +
>   rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
>   if (unlikely(rc)) {
>   ipoib_warn(priv, "post_send failed, error %d\n", rc);
>   ++dev->stats.tx_errors;
> + spin_lock_irqsave(>lock, flags);
> + --priv->tx_outstanding;
> + if (netif_queue_stopped(dev))
> + netif_wake_queue(dev);
> + spin_unlock_irqrestore(>lock, flags);
Why are you locking the netif_* calls?
>   ipoib_dma_unmap_tx(priv, tx_req);
>   dev_kfree_skb_any(skb);
>   } else {
>   dev->trans_start = jiffies;
>   ++tx->tx_head;
> -
> - if (++priv->tx_outstanding == ipoib_sendq_size) {
> - ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net 
> queue\n",
> -   tx->qp->qp_num);
> - netif_stop_queue(dev);
> - rc = ib_req_notify_cq(priv->send_cq,
> - IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
> - if (rc < 0)
> - ipoib_warn(priv, "request notify on send CQ 
> failed\n");
> - else if (rc)
> - ipoib_send_comp_handler(priv->send_cq, dev);
> - }
>   }
>  }
>  
> @@ -796,10 +806,13 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, 
> struct ib_wc *wc)
>   netif_tx_lock(dev);
>  
>   ++tx->tx_tail;
> +
> + spin_lock_irqsave(>lock, flags);
>   if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
>   netif_queue_stopped(dev) &&
>   test_bit(IPOIB_FLAG_ADMIN_UP, >flags))
>   netif_wake_queue(dev);
> + spin_unlock_irqrestore(>lock, flags);
>  
>   if 

[PATCH] IPoIB: serialize changing on tx_outstanding

2015-09-27 Thread Wengang Wang
The changing on tx_outstanding should be protected by spinlock or to be
atomic operations.

Such log is found in dmesg:

Sep 16 14:20:53 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034733, 
tx_tail 1034733, tx_outstanding 359 ipoib_sendq_size: 512
Sep 16 14:21:33 naep11x06 kernel: ib0: transmit timeout: latency 9560 msecs
Sep 16 14:21:33 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034854, 
tx_tail 1034854, tx_outstanding 511 ipoib_sendq_size: 512
Sep 16 14:21:38 naep11x06 kernel: ib0: transmit timeout: latency 14568 msecs
Sep 16 14:21:38 naep11x06 kernel: ib0: queue stopped 1, tx_head 1034854, 
tx_tail 1034854, tx_outstanding 511 ipoib_sendq_size: 512

And the send queue of ib0 kept full. When transmit timeout is reported,
queue is reported as "stopped", but the IPoIB stuff tx_head and tx_tail
points to same value. I am not able to see such numbers in ipoib_cm_tx
(for CM) because I have no vmcore. Though I am not quite sure it's caused
by parallel access of tx_outstanding(send path VS interrup path), we really
need to serialize the changeing on tx_outstanding.

This patch also make sure the increase of tx_outstanding prior to the
calling of post_send to avoid the possible decreasing before increasing in
case the running of increasing is scheduled later than the interrupt
handler.

Signed-off-by: Wengang Wang 
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 40 +++--
 drivers/infiniband/ulp/ipoib/ipoib_ib.c | 24 ++--
 2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 
b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index c78dc16..044da94 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff 
*skb, struct ipoib_cm_
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
+   unsigned long flags;
 
if (unlikely(skb->len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, 
dropping\n",
@@ -742,27 +743,36 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff 
*skb, struct ipoib_cm_
skb_orphan(skb);
skb_dst_drop(skb);
 
+   spin_lock_irqsave(>lock, flags);
+   if (++priv->tx_outstanding == ipoib_sendq_size) {
+   ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net 
queue\n",
+ tx->qp->qp_num);
+   netif_stop_queue(dev);
+   }
+   spin_unlock_irqrestore(>lock, flags);
+   if (netif_queue_stopped(dev)) {
+   rc = ib_req_notify_cq(priv->send_cq,
+   IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+   if (rc < 0)
+   ipoib_warn(priv, "request notify on send CQ failed\n");
+   else if (rc)
+   ipoib_send_comp_handler(priv->send_cq, dev);
+   }
+
rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
if (unlikely(rc)) {
ipoib_warn(priv, "post_send failed, error %d\n", rc);
++dev->stats.tx_errors;
+   spin_lock_irqsave(>lock, flags);
+   --priv->tx_outstanding;
+   if (netif_queue_stopped(dev))
+   netif_wake_queue(dev);
+   spin_unlock_irqrestore(>lock, flags);
ipoib_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(skb);
} else {
dev->trans_start = jiffies;
++tx->tx_head;
-
-   if (++priv->tx_outstanding == ipoib_sendq_size) {
-   ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net 
queue\n",
- tx->qp->qp_num);
-   netif_stop_queue(dev);
-   rc = ib_req_notify_cq(priv->send_cq,
-   IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
-   if (rc < 0)
-   ipoib_warn(priv, "request notify on send CQ 
failed\n");
-   else if (rc)
-   ipoib_send_comp_handler(priv->send_cq, dev);
-   }
}
 }
 
@@ -796,10 +806,13 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct 
ib_wc *wc)
netif_tx_lock(dev);
 
++tx->tx_tail;
+
+   spin_lock_irqsave(>lock, flags);
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, >flags))
netif_wake_queue(dev);
+   spin_unlock_irqrestore(>lock, flags);
 
if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -1169,6 +1182,7 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
struct ipoib_dev_priv *priv = netdev_priv(p->dev);