timewait sockets have a complex refcounting logic.
Once we realize it should be similar to established and
syn_recv sockets, we can use sk_nulls_del_node_init_rcu()
and remove inet_twsk_unhash()

In particular, deferred inet_twsk_put() added in commit
13475a30b66cd ("tcp: connect() race with timewait reuse")
looks unecessary : When removing a timewait socket from
ehash or bhash, caller must own a reference on the socket
anyway.

Signed-off-by: Eric Dumazet <eduma...@google.com>
---
 include/net/inet_hashtables.h    |  4 ++--
 include/net/inet_timewait_sock.h |  6 ++----
 net/ipv4/inet_hashtables.c       | 31 ++++++++++-------------------
 net/ipv4/inet_timewait_sock.c    | 42 ++++++----------------------------------
 net/ipv6/inet6_hashtables.c      |  6 +-----
 5 files changed, 21 insertions(+), 68 deletions(-)

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index b73c88a19dd4..b07d126694a7 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk);
 
 void inet_hashinfo_init(struct inet_hashinfo *h);
 
-int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw);
-int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw);
+void __inet_hash_nolisten(struct sock *sk, struct sock *osk);
+void __inet_hash(struct sock *sk, struct sock *osk);
 void inet_hash(struct sock *sk);
 void inet_unhash(struct sock *sk);
 
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 360c4802288d..96f52a4711c8 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -100,10 +100,8 @@ static inline struct inet_timewait_sock *inet_twsk(const 
struct sock *sk)
 void inet_twsk_free(struct inet_timewait_sock *tw);
 void inet_twsk_put(struct inet_timewait_sock *tw);
 
-int inet_twsk_unhash(struct inet_timewait_sock *tw);
-
-int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
-                         struct inet_hashinfo *hashinfo);
+void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
+                          struct inet_hashinfo *hashinfo);
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
                                           struct inet_timewait_death_row *dr,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 5f9b063bbe8a..e58840330da7 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -343,7 +343,6 @@ static int __inet_check_established(struct 
inet_timewait_death_row *death_row,
        struct sock *sk2;
        const struct hlist_nulls_node *node;
        struct inet_timewait_sock *tw = NULL;
-       int twrefcnt = 0;
 
        spin_lock(lock);
 
@@ -371,12 +370,10 @@ static int __inet_check_established(struct 
inet_timewait_death_row *death_row,
        WARN_ON(!sk_unhashed(sk));
        __sk_nulls_add_node_rcu(sk, &head->chain);
        if (tw) {
-               twrefcnt = inet_twsk_unhash(tw);
+               sk_nulls_del_node_init_rcu((struct sock *)tw);
                NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
        }
        spin_unlock(lock);
-       if (twrefcnt)
-               inet_twsk_put(tw);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 
        if (twp) {
@@ -384,7 +381,6 @@ static int __inet_check_established(struct 
inet_timewait_death_row *death_row,
        } else if (tw) {
                /* Silly. Should hash-dance instead... */
                inet_twsk_deschedule(tw);
-
                inet_twsk_put(tw);
        }
        return 0;
@@ -403,13 +399,12 @@ static u32 inet_sk_port_offset(const struct sock *sk)
                                          inet->inet_dport);
 }
 
-int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
+void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
 {
        struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
        struct hlist_nulls_head *list;
        struct inet_ehash_bucket *head;
        spinlock_t *lock;
-       int twrefcnt = 0;
 
        WARN_ON(!sk_unhashed(sk));
 
@@ -420,23 +415,22 @@ int __inet_hash_nolisten(struct sock *sk, struct 
inet_timewait_sock *tw)
 
        spin_lock(lock);
        __sk_nulls_add_node_rcu(sk, list);
-       if (tw) {
-               WARN_ON(sk->sk_hash != tw->tw_hash);
-               twrefcnt = inet_twsk_unhash(tw);
+       if (osk) {
+               WARN_ON(sk->sk_hash != osk->sk_hash);
+               sk_nulls_del_node_init_rcu(osk);
        }
        spin_unlock(lock);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-       return twrefcnt;
 }
 EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
 
-int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
+void __inet_hash(struct sock *sk, struct sock *osk)
 {
        struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
        struct inet_listen_hashbucket *ilb;
 
        if (sk->sk_state != TCP_LISTEN)
-               return __inet_hash_nolisten(sk, tw);
+               return __inet_hash_nolisten(sk, osk);
 
        WARN_ON(!sk_unhashed(sk));
        ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
@@ -445,7 +439,6 @@ int __inet_hash(struct sock *sk, struct inet_timewait_sock 
*tw)
        __sk_nulls_add_node_rcu(sk, &ilb->head);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
        spin_unlock(&ilb->lock);
-       return 0;
 }
 EXPORT_SYMBOL(__inet_hash);
 
@@ -492,7 +485,6 @@ int __inet_hash_connect(struct inet_timewait_death_row 
*death_row,
        struct inet_bind_bucket *tb;
        int ret;
        struct net *net = sock_net(sk);
-       int twrefcnt = 1;
 
        if (!snum) {
                int i, remaining, low, high, port;
@@ -560,18 +552,15 @@ ok:
                inet_bind_hash(sk, tb, port);
                if (sk_unhashed(sk)) {
                        inet_sk(sk)->inet_sport = htons(port);
-                       twrefcnt += __inet_hash_nolisten(sk, tw);
+                       __inet_hash_nolisten(sk, (struct sock *)tw);
                }
                if (tw)
-                       twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
+                       inet_twsk_bind_unhash(tw, hinfo);
                spin_unlock(&head->lock);
 
                if (tw) {
                        inet_twsk_deschedule(tw);
-                       while (twrefcnt) {
-                               twrefcnt--;
-                               inet_twsk_put(tw);
-                       }
+                       inet_twsk_put(tw);
                }
 
                ret = 0;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 2ffbd16b79e0..92cd4d50404e 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -18,28 +18,6 @@
 
 
 /**
- *     inet_twsk_unhash - unhash a timewait socket from established hash
- *     @tw: timewait socket
- *
- *     unhash a timewait socket from established hash, if hashed.
- *     ehash lock must be held by caller.
- *     Returns 1 if caller should call inet_twsk_put() after lock release.
- */
-int inet_twsk_unhash(struct inet_timewait_sock *tw)
-{
-       if (hlist_nulls_unhashed(&tw->tw_node))
-               return 0;
-
-       hlist_nulls_del_rcu(&tw->tw_node);
-       sk_nulls_node_init(&tw->tw_node);
-       /*
-        * We cannot call inet_twsk_put() ourself under lock,
-        * caller must call it for us.
-        */
-       return 1;
-}
-
-/**
  *     inet_twsk_bind_unhash - unhash a timewait socket from bind hash
  *     @tw: timewait socket
  *     @hashinfo: hashinfo pointer
@@ -48,35 +26,29 @@ int inet_twsk_unhash(struct inet_timewait_sock *tw)
  *     bind hash lock must be held by caller.
  *     Returns 1 if caller should call inet_twsk_put() after lock release.
  */
-int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
+void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
                          struct inet_hashinfo *hashinfo)
 {
        struct inet_bind_bucket *tb = tw->tw_tb;
 
        if (!tb)
-               return 0;
+               return;
 
        __hlist_del(&tw->tw_bind_node);
        tw->tw_tb = NULL;
        inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-       /*
-        * We cannot call inet_twsk_put() ourself under lock,
-        * caller must call it for us.
-        */
-       return 1;
+       __sock_put((struct sock *)tw);
 }
 
 /* Must be called with locally disabled BHs. */
 static void inet_twsk_kill(struct inet_timewait_sock *tw)
 {
        struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
-       struct inet_bind_hashbucket *bhead;
-       int refcnt;
-       /* Unlink from established hashes. */
        spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
+       struct inet_bind_hashbucket *bhead;
 
        spin_lock(lock);
-       refcnt = inet_twsk_unhash(tw);
+       sk_nulls_del_node_init_rcu((struct sock *)tw);
        spin_unlock(lock);
 
        /* Disassociate with bind bucket. */
@@ -84,11 +56,9 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
                        hashinfo->bhash_size)];
 
        spin_lock(&bhead->lock);
-       refcnt += inet_twsk_bind_unhash(tw, hashinfo);
+       inet_twsk_bind_unhash(tw, hashinfo);
        spin_unlock(&bhead->lock);
 
-       BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt));
-       atomic_sub(refcnt, &tw->tw_refcnt);
        atomic_dec(&tw->tw_dr->tw_count);
        inet_twsk_put(tw);
 }
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b4fd96de97e6..a237398aa2b4 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -207,7 +207,6 @@ static int __inet6_check_established(struct 
inet_timewait_death_row *death_row,
        struct sock *sk2;
        const struct hlist_nulls_node *node;
        struct inet_timewait_sock *tw = NULL;
-       int twrefcnt = 0;
 
        spin_lock(lock);
 
@@ -234,12 +233,10 @@ static int __inet6_check_established(struct 
inet_timewait_death_row *death_row,
        WARN_ON(!sk_unhashed(sk));
        __sk_nulls_add_node_rcu(sk, &head->chain);
        if (tw) {
-               twrefcnt = inet_twsk_unhash(tw);
+               sk_nulls_del_node_init_rcu((struct sock *)tw);
                NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
        }
        spin_unlock(lock);
-       if (twrefcnt)
-               inet_twsk_put(tw);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 
        if (twp) {
@@ -247,7 +244,6 @@ static int __inet6_check_established(struct 
inet_timewait_death_row *death_row,
        } else if (tw) {
                /* Silly. Should hash-dance instead... */
                inet_twsk_deschedule(tw);
-
                inet_twsk_put(tw);
        }
        return 0;
-- 
2.4.3.573.g4eafbef

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to