We trigger a soft lockup as we grab nametbl_lock twice if the node is processing pending nametable updates while the module is exiting.
The following are the call chain associated: tipc_named_rcv() Grabs nametbl_lock tipc_update_nametbl() (publish/withdraw) tipc_nametbl_insert_publ()/remove_publ() tipc_nameseq_insert_publ()/ tipc_subscrp_report_overlap(TIPC_PUBLISHED/TIPC_WITHDRAWN) tipc_subscrp_send_event(..,TIPC_SUBSCR_TIMEOUT,..) tipc_conn_sendmsg() tipc_conn_lookup() got a connection, refcount == 2. << At the same time another cpu can execute tipc_server_stop(), resetting the connection flags but fails to cleanup as refcount == 1. Now in our cpu as (con->flags != CF_CONNECTED), we trigger the cleanup at conn_put() leading to: >> tipc_conn_kref_release tipc_sock_release tipc_conn_release tipc_subscrb_delete tipc_subscrp_delete tipc_nametbl_unsubscribe << Soft Lockup >> Until now, a caller triggering the connection release while holding the nametbl_lock ends in soft lockup. As tipc_conn_kref_release() grabs the lock again at socket cleanup. In this commit, we perform the connection cleanup in a work queue context without holding any locks. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 54c23ecccd26..ebb75afcc870 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -61,6 +61,7 @@ * @outqueue_lock: control access to the outqueue * @outqueue: list of connection objects for its server * @swork: send work item + * @cwork: cleanup work */ struct tipc_conn { struct kref kref; @@ -74,6 +75,7 @@ struct tipc_conn { struct list_head outqueue; spinlock_t outqueue_lock; struct work_struct swork; + struct work_struct cwork; }; /* An entry waiting to be sent */ @@ -88,9 +90,10 @@ static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); static void tipc_sock_release(struct tipc_conn *con); -static void tipc_conn_kref_release(struct kref *kref) +/* tipc_conn_cleanup - cleanup the connections */ +static void tipc_conn_cleanup(struct work_struct *work) { - struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct tipc_conn *con = container_of(work, struct tipc_conn, cwork); struct sockaddr_tipc *saddr = con->server->saddr; struct socket *sock = con->sock; struct sock *sk; @@ -108,10 +111,19 @@ static void tipc_conn_kref_release(struct kref *kref) con->sock = NULL; } - tipc_clean_outqueues(con); kfree(con); } +static void tipc_conn_kref_release(struct kref *kref) +{ + struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + + tipc_clean_outqueues(con); + + /* all socket operations need to be done outside locks */ + schedule_work(&con->cwork); +} + static void conn_put(struct tipc_conn *con) { kref_put(&con->kref, tipc_conn_kref_release); @@ -232,6 +244,7 @@ static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) spin_lock_init(&con->outqueue_lock); INIT_WORK(&con->swork, tipc_send_work); INIT_WORK(&con->rwork, tipc_recv_work); + INIT_WORK(&con->cwork, tipc_conn_cleanup); spin_lock_bh(&s->idr_lock); ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); -- 2.1.4 ------------------------------------------------------------------------------ _______________________________________________ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion