3.16.60-rc1 review patch.  If anyone has any objections, please let me know.

------------------

From: Willem de Bruijn <will...@google.com>

commit a6361f0ca4b25460f2cdf3235ebe8115f622901e upstream.

Updates to the bitfields in struct packet_sock are not atomic.
Serialize these read-modify-write cycles.

Move po->running into a separate variable. Its writes are protected by
po->bind_lock (except for one startup case at packet_create). Also
replace a textual precondition warning with lockdep annotation.

All others are set only in packet_setsockopt. Serialize these
updates by holding the socket lock. Analogous to other field updates,
also hold the lock when testing whether a ring is active (pg_vec).

Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg")
Reported-by: DaeRyong Jeong <threeear...@gmail.com>
Reported-by: Byoungyoung Lee <byoungyo...@purdue.edu>
Signed-off-by: Willem de Bruijn <will...@google.com>
Signed-off-by: David S. Miller <da...@davemloft.net>
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings <b...@decadent.org.uk>
---
 net/packet/af_packet.c | 60 +++++++++++++++++++++++++++++++-----------
 net/packet/internal.h  | 10 +++----
 2 files changed, 49 insertions(+), 21 deletions(-)

--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -343,11 +343,11 @@ static void packet_pick_tx_queue(struct
        skb_set_queue_mapping(skb, queue_index);
 }
 
-/* register_prot_hook must be invoked with the po->bind_lock held,
+/* __register_prot_hook must be invoked through register_prot_hook
  * or from a context in which asynchronous accesses to the packet
  * socket is not possible (packet_create()).
  */
-static void register_prot_hook(struct sock *sk)
+static void __register_prot_hook(struct sock *sk)
 {
        struct packet_sock *po = pkt_sk(sk);
 
@@ -362,8 +362,13 @@ static void register_prot_hook(struct so
        }
 }
 
-/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
- * held.   If the sync parameter is true, we will temporarily drop
+static void register_prot_hook(struct sock *sk)
+{
+       lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
+       __register_prot_hook(sk);
+}
+
+/* If the sync parameter is true, we will temporarily drop
  * the po->bind_lock and do a synchronize_net to make sure no
  * asynchronous packet processing paths still refer to the elements
  * of po->prot_hook.  If the sync parameter is false, it is the
@@ -373,6 +378,8 @@ static void __unregister_prot_hook(struc
 {
        struct packet_sock *po = pkt_sk(sk);
 
+       lockdep_assert_held_once(&po->bind_lock);
+
        po->running = 0;
 
        if (po->fanout)
@@ -2887,7 +2894,7 @@ static int packet_create(struct net *net
 
        if (proto) {
                po->prot_hook.type = proto;
-               register_prot_hook(sk);
+               __register_prot_hook(sk);
        }
 
        mutex_lock(&net->packet.sklist_lock);
@@ -3386,12 +3393,18 @@ packet_setsockopt(struct socket *sock, i
 
                if (optlen != sizeof(val))
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
-               po->tp_loss = !!val;
-               return 0;
+
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->tp_loss = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
+               return ret;
        }
        case PACKET_AUXDATA:
        {
@@ -3402,7 +3415,9 @@ packet_setsockopt(struct socket *sock, i
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
+               lock_sock(sk);
                po->auxdata = !!val;
+               release_sock(sk);
                return 0;
        }
        case PACKET_ORIGDEV:
@@ -3414,7 +3429,9 @@ packet_setsockopt(struct socket *sock, i
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
+               lock_sock(sk);
                po->origdev = !!val;
+               release_sock(sk);
                return 0;
        }
        case PACKET_VNET_HDR:
@@ -3423,15 +3440,20 @@ packet_setsockopt(struct socket *sock, i
 
                if (sock->type != SOCK_RAW)
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (optlen < sizeof(val))
                        return -EINVAL;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
-               po->has_vnet_hdr = !!val;
-               return 0;
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->has_vnet_hdr = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
+               return ret;
        }
        case PACKET_TIMESTAMP:
        {
@@ -3462,11 +3484,17 @@ packet_setsockopt(struct socket *sock, i
 
                if (optlen != sizeof(val))
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
-               po->tp_tx_has_off = !!val;
+
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->tp_tx_has_off = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
                return 0;
        }
        case PACKET_QDISC_BYPASS:
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -100,10 +100,12 @@ struct packet_sock {
        int                     copy_thresh;
        spinlock_t              bind_lock;
        struct mutex            pg_vec_lock;
-       unsigned int            running:1,      /* prot_hook is attached*/
-                               auxdata:1,
+       unsigned int            running;        /* bind_lock must be held */
+       unsigned int            auxdata:1,      /* writer must hold sock lock */
                                origdev:1,
-                               has_vnet_hdr:1;
+                               has_vnet_hdr:1,
+                               tp_loss:1,
+                               tp_tx_has_off:1;
        int                     ifindex;        /* bound device         */
        __be16                  num;
        struct packet_mclist    *mclist;
@@ -111,8 +113,6 @@ struct packet_sock {
        enum tpacket_versions   tp_version;
        unsigned int            tp_hdrlen;
        unsigned int            tp_reserve;
-       unsigned int            tp_loss:1;
-       unsigned int            tp_tx_has_off:1;
        unsigned int            tp_tstamp;
        struct net_device __rcu *cached_dev;
        int                     (*xmit)(struct sk_buff *skb);

Reply via email to