3.8.13.15 -stable review patch. If anyone has any objections, please let me know.
------------------ From: Daniel Borkmann <dbork...@redhat.com> [ Upstream commit e40526cb20b5ee53419452e1f03d97092f144418 ] Salam reported a use after free bug in PF_PACKET that occurs when we're sending out frames on a socket bound device and suddenly the net device is being unregistered. It appears that commit 827d9780 introduced a possible race condition between {t,}packet_snd() and packet_notifier(). In the case of a bound socket, packet_notifier() can drop the last reference to the net_device and {t,}packet_snd() might end up suddenly sending a packet over a freed net_device. To avoid reverting 827d9780 and thus introducing a performance regression compared to the current state of things, we decided to hold a cached RCU protected pointer to the net device and maintain it on write side via bind spin_lock protected register_prot_hook() and __unregister_prot_hook() calls. In {t,}packet_snd() path, we access this pointer under rcu_read_lock through packet_cached_dev_get() that holds reference to the device to prevent it from being freed through packet_notifier() while we're in send path. This is okay to do as dev_put()/dev_hold() are per-cpu counters, so this should not be a performance issue. Also, the code simplifies a bit as we don't need need_rls_dev anymore. Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.") Reported-by: Salam Noureddine <nouredd...@aristanetworks.com> Signed-off-by: Daniel Borkmann <dbork...@redhat.com> Signed-off-by: Salam Noureddine <nouredd...@aristanetworks.com> Cc: Ben Greear <gree...@candelatech.com> Cc: Eric Dumazet <eric.duma...@gmail.com> Signed-off-by: David S. Miller <da...@davemloft.net> Signed-off-by: Kamal Mostafa <ka...@canonical.com> --- net/packet/af_packet.c | 59 ++++++++++++++++++++++++++++++-------------------- net/packet/internal.h | 1 + 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 517d154..2d5b24a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -236,11 +236,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { - if (po->fanout) + if (po->fanout) { __fanout_link(sk, po); - else + } else { dev_add_pack(&po->prot_hook); + rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); + } + sock_hold(sk); po->running = 1; } @@ -258,10 +262,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) + if (po->fanout) { __fanout_unlink(sk, po); - else + } else { __dev_remove_pack(&po->prot_hook); + RCU_INIT_POINTER(po->cached_dev, NULL); + } + __sock_put(sk); if (sync) { @@ -1960,12 +1967,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -1978,7 +1997,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -1992,19 +2011,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2081,8 +2098,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2120,7 +2136,6 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2136,7 +2151,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2148,19 +2163,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; - if (sock->type == SOCK_RAW) - reserve = dev->hard_header_len; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) + if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + if (sock->type == SOCK_RAW) + reserve = dev->hard_header_len; if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2293,15 +2306,14 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2521,6 +2533,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + RCU_INIT_POINTER(po->cached_dev, NULL); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); diff --git a/net/packet/internal.h b/net/packet/internal.h index e84cab8..821dfee 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -111,6 +111,7 @@ struct packet_sock { unsigned int tp_loss:1; unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; + struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/