Commit 333f796235a527 ("tipc: fix a race condition leading to
subscriber refcnt bug") reveals a soft lockup while acquiring
nametbl_lock.

The soft lock is the call chain of tipc_nametbl_withdraw(), when it
performs the tipc_conn_kref_release() as it tries to grab nametbl_lock
again while holding it already.
tipc_nametbl_withdraw
  spin_lock_bh(&tn->nametbl_lock);
  tipc_nametbl_remove_publ
     tipc_nameseq_remove_publ
       tipc_subscrp_report_overlap
         tipc_subscrp_send_event
            tipc_conn_sendmsg
Here, the (test_bit(CF_CONNECTED, &con->flags)) fails, leading to the
else case where do do a conn_put() and that triggers the cleanup as
refcount reached 0. Leading the call chain below:
tipc_conn_kref_release
    tipc_sock_release
      tipc_conn_release
         tipc_subscrb_delete
            tipc_subscrp_delete
               tipc_nametbl_unsubscribe
                  spin_lock_bh(&tn->nametbl_lock);  << Soft Lockup >>

The above lockup is caused due to two reasons:
1. During module exit tipc_exit_net() first calls tipc_topsrv_stop()
   followed by tipc_nametbl_withdraw() in scope of tipc_net_stop().
2. tipc_server_stop() grabs and releases the idr_lock twice for every
   connection as it needs to execute functions that might sleep.
   The first lock is to find the connection and the second sets the
   connection flag, removes it from list and decrements the refcount.
   There is a possibility for another thread to grab the connection
   in between the two, thus owning the connection and triggering the
   cleanup while decrementing the refcount later.

In this commit, we perform:
- call tipc_nametbl_withdraw() before tipc_topsrv_stop()
- In tipc_server_stop():
  we remove the connection id's of all connections from connection
  list in the scope of the idr_lock.
  copy s->idr_in_use to stack, as this is being updated in the loop.
  Thus no other thread will find a connection which has unset
  CF_CONNECTED in its flags.

Fixes: 333f796235a52727 ("tipc: fix a race condition leading to subscriber 
refcnt bug")
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com>
---
 net/tipc/core.c   |  4 ++++
 net/tipc/net.c    |  2 --
 net/tipc/server.c | 19 +++++++++++++++----
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 236b043a4156..3ea1452e2f06 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -93,6 +93,10 @@ static int __net_init tipc_init_net(struct net *net)
 
 static void __net_exit tipc_exit_net(struct net *net)
 {
+       struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+       tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0,
+                             tn->own_addr);
        tipc_topsrv_stop(net);
        tipc_net_stop(net);
        tipc_bcast_stop(net);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 28bf4feeb81c..92696cc6e763 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -130,8 +130,6 @@ void tipc_net_stop(struct net *net)
        if (!tn->own_addr)
                return;
 
-       tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0,
-                             tn->own_addr);
        rtnl_lock();
        tipc_bearer_stop(net);
        tipc_node_stop(net);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 215849ce453d..22a4caf66065 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -622,22 +622,33 @@ int tipc_server_start(struct tipc_server *s)
 
 void tipc_server_stop(struct tipc_server *s)
 {
+       int max_in_use = s->idr_in_use;
        struct tipc_conn *con;
        int total = 0;
        int id;
 
        spin_lock_bh(&s->idr_lock);
-       for (id = 0; total < s->idr_in_use; id++) {
+       for (id = 0; total < max_in_use; id++) {
                con = idr_find(&s->conn_idr, id);
                if (con) {
                        total++;
-                       spin_unlock_bh(&s->idr_lock);
-                       tipc_close_conn(con);
-                       spin_lock_bh(&s->idr_lock);
+                       if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
+                               idr_remove(&s->conn_idr, con->conid);
+                               s->idr_in_use--;
+                       }
                }
        }
        spin_unlock_bh(&s->idr_lock);
 
+       for (id = 0; total < max_in_use; id++) {
+               con = idr_find(&s->conn_idr, id);
+               if (con) {
+                       total++;
+                       kernel_sock_shutdown(con->sock, SHUT_RDWR);
+                       conn_put(con);
+               }
+       }
+
        tipc_work_stop(s);
        kmem_cache_destroy(s->rcvbuf_cache);
        idr_destroy(&s->conn_idr);
-- 
2.1.4


------------------------------------------------------------------------------
_______________________________________________
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion

Reply via email to