Everything should now be ready to finally allow SYN
packets processing without holding listener lock.

Tested:

3.5 Mpps SYNFLOOD. Plenty of cpu cycles available.

Next bottleneck is the refcount taken on listener,
that could be avoided if we remove SLAB_DESTROY_BY_RCU
strict semantic for listeners, and use regular RCU.

    13.18%  [kernel]  [k] __inet_lookup_listener
     9.61%  [kernel]  [k] tcp_conn_request
     8.16%  [kernel]  [k] sha_transform
     5.30%  [kernel]  [k] inet_reqsk_alloc
     4.22%  [kernel]  [k] sock_put
     3.74%  [kernel]  [k] tcp_make_synack
     2.88%  [kernel]  [k] ipt_do_table
     2.56%  [kernel]  [k] memcpy_erms
     2.53%  [kernel]  [k] sock_wfree
     2.40%  [kernel]  [k] tcp_v4_rcv
     2.08%  [kernel]  [k] fib_table_lookup
     1.84%  [kernel]  [k] tcp_openreq_init_rwin

Signed-off-by: Eric Dumazet <eduma...@google.com>
---
 net/ipv4/tcp_ipv4.c | 11 +++++++++--
 net/ipv6/tcp_ipv6.c | 11 +++++++++--
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ac2ea73e9aaf..34310748a365 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1355,7 +1355,7 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, 
struct sk_buff *skb)
 }
 
 /* The socket must have it's spinlock held when we get
- * here.
+ * here, unless it is a TCP_LISTEN socket.
  *
  * We have a potential double-lock case here, so even when
  * doing backlog processing we use the BH locking scheme.
@@ -1619,9 +1619,15 @@ process:
        if (sk_filter(sk, skb))
                goto discard_and_relse;
 
-       sk_incoming_cpu_update(sk);
        skb->dev = NULL;
 
+       if (sk->sk_state == TCP_LISTEN) {
+               ret = tcp_v4_do_rcv(sk, skb);
+               goto put_and_return;
+       }
+
+       sk_incoming_cpu_update(sk);
+
        bh_lock_sock_nested(sk);
        tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
        ret = 0;
@@ -1636,6 +1642,7 @@ process:
        }
        bh_unlock_sock(sk);
 
+put_and_return:
        sock_put(sk);
 
        return ret;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3d18571811c5..33334f0c217d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1161,7 +1161,7 @@ out:
 }
 
 /* The socket must have it's spinlock held when we get
- * here.
+ * here, unless it is a TCP_LISTEN socket.
  *
  * We have a potential double-lock case here, so even when
  * doing backlog processing we use the BH locking scheme.
@@ -1415,9 +1415,15 @@ process:
        if (sk_filter(sk, skb))
                goto discard_and_relse;
 
-       sk_incoming_cpu_update(sk);
        skb->dev = NULL;
 
+       if (sk->sk_state == TCP_LISTEN) {
+               ret = tcp_v6_do_rcv(sk, skb);
+               goto put_and_return;
+       }
+
+       sk_incoming_cpu_update(sk);
+
        bh_lock_sock_nested(sk);
        tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
        ret = 0;
@@ -1432,6 +1438,7 @@ process:
        }
        bh_unlock_sock(sk);
 
+put_and_return:
        sock_put(sk);
        return ret ? -1 : 0;
 
-- 
2.6.0.rc2.230.g3dd15c0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to