New rds rdma self tests exposed a hang when tearing down the ib network configs. This is caused by the shutdown worker thread sleeping on the wait_event call, which blocks other work items in the queue. Fix this by changing wait_event to wait_event timeout, and looping until the wait check succeeds.
Signed-off-by: Allison Henderson <[email protected]> --- net/rds/ib_cm.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0c64c504f79db..6b40345ba44d1 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -1038,6 +1038,19 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp) return ret; } +static unsigned long rds_ib_conn_path_shutdown_check_wait(struct rds_conn_path *cp) +{ + struct rds_connection *conn = cp->cp_conn; + struct rds_ib_connection *ic = conn->c_transport_data; + + return (!ic->i_cm_id || + (rds_ib_ring_empty(&ic->i_recv_ring) && + (atomic_read(&ic->i_signaled_sends) == 0) && + (atomic_read(&ic->i_fastreg_inuse_count)) == 0 && + (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR))) ? 0 + : msecs_to_jiffies(1000); +} + /* * This is so careful about only cleaning up resources that were built up * so that it can be called at any point during startup. In fact it @@ -1078,11 +1091,13 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) * sends to complete we're ensured that there will be no * more tx processing. */ - wait_event(rds_ib_ring_empty_wait, - rds_ib_ring_empty(&ic->i_recv_ring) && - (atomic_read(&ic->i_signaled_sends) == 0) && - (atomic_read(&ic->i_fastreg_inuse_count) == 0) && - (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); + while (!wait_event_timeout(rds_ib_ring_empty_wait, + rds_ib_conn_path_shutdown_check_wait(cp) == 0, + msecs_to_jiffies(1000))) { + tasklet_schedule(&ic->i_send_tasklet); + tasklet_schedule(&ic->i_recv_tasklet); + } + tasklet_kill(&ic->i_send_tasklet); tasklet_kill(&ic->i_recv_tasklet); -- 2.25.1
