Re: [PATCH net-next 08/10] net/smc: introduce a delay

2017-09-20 Thread Ursula Braun


On 09/20/2017 04:03 PM, Leon Romanovsky wrote:
> On Wed, Sep 20, 2017 at 01:58:11PM +0200, Ursula Braun wrote:
>> The number of outstanding work requests is limited. If all work
>> requests are in use, tx processing is postponed to another scheduling
>> of the tx worker. Switch to a delayed worker to have a gap for tx
>> completion queue events before the next retry.
>>
> 
> How will delay prevent and protect the resource exhausting?
> 
> Thanks
> 

SMC runs with a fixed number of in-flight work requests per QP (constant
SMC_WR_BUF_CNT) to prevent resource exhausting. If all work requests are
currently in use, sending of another work request has to wait till some
outstanding work request is confirmed via send completion queue. If sending
is done in a context which is not allowed to wait, the tx_worker is
scheduled instead.
With this patch a small delay is added to avoid too many unsuccessful send
retries due to a still ongoing "all work requests in use" condition.


0xC5ED6645.asc
Description: application/pgp-keys


signature.asc
Description: OpenPGP digital signature


Re: [PATCH net-next 08/10] net/smc: introduce a delay

2017-09-20 Thread Leon Romanovsky
On Wed, Sep 20, 2017 at 01:58:11PM +0200, Ursula Braun wrote:
> The number of outstanding work requests is limited. If all work
> requests are in use, tx processing is postponed to another scheduling
> of the tx worker. Switch to a delayed worker to have a gap for tx
> completion queue events before the next retry.
>

How will delay prevent and protect the resource exhausting?

Thanks


signature.asc
Description: PGP signature


[PATCH net-next 08/10] net/smc: introduce a delay

2017-09-20 Thread Ursula Braun
The number of outstanding work requests is limited. If all work
requests are in use, tx processing is postponed to another scheduling
of the tx worker. Switch to a delayed worker to have a gap for tx
completion queue events before the next retry.

Signed-off-by: Ursula Braun 
---
 net/smc/smc.h   |  2 +-
 net/smc/smc_close.c | 12 +++-
 net/smc/smc_tx.c| 12 
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/net/smc/smc.h b/net/smc/smc.h
index 6e44313e4467..0ccd6fa387ad 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -149,7 +149,7 @@ struct smc_connection {
atomic_tsndbuf_space;   /* remaining space in sndbuf */
u16 tx_cdc_seq; /* sequence # for CDC send */
spinlock_t  send_lock;  /* protect wr_sends */
-   struct work_struct  tx_work;/* retry of smc_cdc_msg_send */
+   struct delayed_work tx_work;/* retry of smc_cdc_msg_send */
 
struct smc_host_cdc_msg local_rx_ctrl;  /* filled during event_handl.
 * .prod cf. TCP rcv_nxt
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 3c2e166b5d22..5201bc103bd8 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -208,7 +208,7 @@ int smc_close_active(struct smc_sock *smc)
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
-   cancel_work_sync(>tx_work);
+   cancel_delayed_work_sync(>tx_work);
lock_sock(sk);
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
@@ -234,7 +234,7 @@ int smc_close_active(struct smc_sock *smc)
if (!smc_cdc_rxed_any_close(conn))
smc_close_stream_wait(smc, timeout);
release_sock(sk);
-   cancel_work_sync(>tx_work);
+   cancel_delayed_work_sync(>tx_work);
lock_sock(sk);
if (sk->sk_err != ECONNABORTED) {
/* confirm close from peer */
@@ -263,7 +263,9 @@ int smc_close_active(struct smc_sock *smc)
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
-   cancel_work_sync(>tx_work);
+   release_sock(sk);
+   cancel_delayed_work_sync(>tx_work);
+   lock_sock(sk);
smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
smc_close_wait_tx_pends(smc);
@@ -425,7 +427,7 @@ int smc_close_shutdown_write(struct smc_sock *smc)
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
-   cancel_work_sync(>tx_work);
+   cancel_delayed_work_sync(>tx_work);
lock_sock(sk);
/* send close wr request */
rc = smc_close_wr(conn);
@@ -439,7 +441,7 @@ int smc_close_shutdown_write(struct smc_sock *smc)
if (!smc_cdc_rxed_any_close(conn))
smc_close_stream_wait(smc, timeout);
release_sock(sk);
-   cancel_work_sync(>tx_work);
+   cancel_delayed_work_sync(>tx_work);
lock_sock(sk);
/* confirm close from peer */
rc = smc_close_wr(conn);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 3c656beb8820..3866573288dd 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -24,6 +24,8 @@
 #include "smc_cdc.h"
 #include "smc_tx.h"
 
+#define SMC_TX_WORK_DELAY  HZ
+
 /* sndbuf producer ***/
 
 /* callback implementation for sk.sk_write_space()
@@ -406,7 +408,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock;
}
rc = 0;
-   schedule_work(>tx_work);
+   schedule_delayed_work(>tx_work,
+ SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
@@ -430,7 +433,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
  */
 static void smc_tx_work(struct work_struct *work)
 {
-   struct smc_connection *conn = container_of(work,
+   struct smc_connection *conn = container_of(to_delayed_work(work),
   struct smc_connection,
   tx_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
@@ -468,7 +471,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
if (!rc)
rc = smc_cdc_msg_send(conn, wr_buf, pend);
if (rc < 0) {
-   schedule_work(>tx_work);
+