On Thu, Nov 30, 2017 at 9:39 AM, Paolo Abeni wrote:
> Since commit e32ea7e74727 ("soreuseport: fast reuseport UDP socket
> selection") and commit c125e80b8868 ("soreuseport: fast reuseport
> TCP socket selection") the relevant reuseport socket matching the current
> packet is selected by the reuseport_select_sock() call. The only
> exceptions are invalid BPF filters/filters returning out-of-range
> indices.
> In the latter case the code implicitly falls back to using the hash
> demultiplexing, but instead of selecting the socket inside the
> reuseport_select_sock() function, it relies on the hash selection
> logic introduced with the early soreuseport implementation.
>
> With this patch, in case of a BPF filter returning a bad socket
> index value, we fall back to hash-based selection inside the
> reuseport_select_sock() body, so that we can drop some duplicate
> code in the ipv4 and ipv6 stack.
>
> This also allows faster lookup in the above scenario and will allow
> us to avoid computing the hash value for successful, BPF based
> demultiplexing - in a later patch.
>
> Signed-off-by: Paolo Abeni
Great optimization!
Acked-by: Craig Gallek
> ---
> net/core/sock_reuseport.c | 4 +++-
> net/ipv4/inet_hashtables.c | 11 ++-
> net/ipv4/udp.c | 22 --
> net/ipv6/inet6_hashtables.c | 11 ++-
> net/ipv6/udp.c | 22 --
> 5 files changed, 15 insertions(+), 55 deletions(-)
>
> diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
> index 5eeb1d20cc38..c5bb52bc73a1 100644
> --- a/net/core/sock_reuseport.c
> +++ b/net/core/sock_reuseport.c
> @@ -235,7 +235,9 @@ struct sock *reuseport_select_sock(struct sock *sk,
>
> if (prog && skb)
> sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
> - else
> +
> + /* no bpf or invalid bpf result: fall back to hash usage */
> + if (!sk2)
> sk2 = reuse->socks[reciprocal_scale(hash, socks)];
> }
>
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index e7d15fb0d94d..427b705d7c64 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -216,32 +216,25 @@ struct sock *__inet_lookup_listener(struct net *net,
> {
> unsigned int hash = inet_lhashfn(net, hnum);
> struct inet_listen_hashbucket *ilb = >listening_hash[hash];
> - int score, hiscore = 0, matches = 0, reuseport = 0;
> bool exact_dif = inet_exact_dif_match(net, skb);
> struct sock *sk, *result = NULL;
> + int score, hiscore = 0;
> u32 phash = 0;
>
> sk_for_each_rcu(sk, >head) {
> score = compute_score(sk, net, hnum, daddr,
> dif, sdif, exact_dif);
> if (score > hiscore) {
> - reuseport = sk->sk_reuseport;
> - if (reuseport) {
> + if (sk->sk_reuseport) {
> phash = inet_ehashfn(net, daddr, hnum,
> saddr, sport);
> result = reuseport_select_sock(sk, phash,
>skb, doff);
> if (result)
> return result;
> - matches = 1;
> }
> result = sk;
> hiscore = score;
> - } else if (score == hiscore && reuseport) {
> - matches++;
> - if (reciprocal_scale(phash, matches) == 0)
> - result = sk;
> - phash = next_pseudo_random32(phash);
> }
> }
> return result;
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index e4ff25c947c5..36f857c87fe2 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -445,7 +445,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
> struct sk_buff *skb)
> {
> struct sock *sk, *result;
> - int score, badness, matches = 0, reuseport = 0;
> + int score, badness;
> u32 hash = 0;
>
> result = NULL;
> @@ -454,23 +454,16 @@ static struct sock *udp4_lib_lookup2(struct net *net,
> score = compute_score(sk, net, saddr, sport,
> daddr, hnum, dif, sdif, exact_dif);
> if (score > badness) {
> - reuseport = sk->sk_reuseport;
> - if (reuseport) {
> + if (sk->sk_reuseport) {
> hash = udp_ehashfn(net, daddr, hnum,
>saddr, sport);
>