The commit is pushed to "branch-rh7-3.10.0-1160.6.1.vz7.171.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.6.1.vz7.171.1
------>
commit 54b1fa63ec25f8831894436420966fe91f90abc0
Author: Willem de Bruijn <will...@google.com>
Date:   Thu Dec 3 12:31:32 2020 +0300

    ms/packet: fix bitfield update race
    
    Updates to the bitfields in struct packet_sock are not atomic.
    Serialize these read-modify-write cycles.
    
    Move po->running into a separate variable. Its writes are protected by
    po->bind_lock (except for one startup case at packet_create). Also
    replace a textual precondition warning with lockdep annotation.
    
    All others are set only in packet_setsockopt. Serialize these
    updates by holding the socket lock. Analogous to other field updates,
    also hold the lock when testing whether a ring is active (pg_vec).
    
    Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for 
recvmsg")
    Reported-by: DaeRyong Jeong <threeear...@gmail.com>
    Reported-by: Byoungyoung Lee <byoungyo...@purdue.edu>
    Signed-off-by: Willem de Bruijn <will...@google.com>
    Signed-off-by: David S. Miller <da...@davemloft.net>
    
    (cherry-picked from commit a6361f0ca4b25460f2cdf3235ebe8115f622901e)
    
https://syzkaller.appspot.com/bug?id=8acde8a70c4bca0d0c39e975409c1008663c93fd
    https://jira.sw.ru/browse/PSBM-123046
    Signed-off-by: Vasily Averin <v...@virtuozzo.com>
---
 net/packet/af_packet.c | 60 ++++++++++++++++++++++++++++++++++++--------------
 net/packet/internal.h  | 10 ++++-----
 2 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 112c856..b71157c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -261,11 +261,11 @@ static void packet_cached_dev_reset(struct packet_sock 
*po)
        RCU_INIT_POINTER(po->cached_dev, NULL);
 }
 
-/* register_prot_hook must be invoked with the po->bind_lock held,
+/* __register_prot_hook must be invoked through register_prot_hook
  * or from a context in which asynchronous accesses to the packet
  * socket is not possible (packet_create()).
  */
-static void register_prot_hook(struct sock *sk)
+static void __register_prot_hook(struct sock *sk)
 {
        struct packet_sock *po = pkt_sk(sk);
 
@@ -280,8 +280,13 @@ static void register_prot_hook(struct sock *sk)
        }
 }
 
-/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
- * held.   If the sync parameter is true, we will temporarily drop
+static void register_prot_hook(struct sock *sk)
+{
+       lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
+       __register_prot_hook(sk);
+}
+
+/* If the sync parameter is true, we will temporarily drop
  * the po->bind_lock and do a synchronize_net to make sure no
  * asynchronous packet processing paths still refer to the elements
  * of po->prot_hook.  If the sync parameter is false, it is the
@@ -291,6 +296,8 @@ static void __unregister_prot_hook(struct sock *sk, bool 
sync)
 {
        struct packet_sock *po = pkt_sk(sk);
 
+       lockdep_assert_held_once(&po->bind_lock);
+
        po->running = 0;
 
        if (po->fanout)
@@ -2897,7 +2904,7 @@ static int packet_create(struct net *net, struct socket 
*sock, int protocol,
 
        if (proto) {
                po->prot_hook.type = proto;
-               register_prot_hook(sk);
+               __register_prot_hook(sk);
        }
 
        mutex_lock(&net->packet.sklist_lock);
@@ -3440,12 +3447,18 @@ packet_setsockopt(struct socket *sock, int level, int 
optname, char __user *optv
 
                if (optlen != sizeof(val))
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
-               po->tp_loss = !!val;
-               return 0;
+
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->tp_loss = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
+               return ret;
        }
        case PACKET_AUXDATA:
        {
@@ -3456,7 +3469,9 @@ packet_setsockopt(struct socket *sock, int level, int 
optname, char __user *optv
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
+               lock_sock(sk);
                po->auxdata = !!val;
+               release_sock(sk);
                return 0;
        }
        case PACKET_ORIGDEV:
@@ -3468,7 +3483,9 @@ packet_setsockopt(struct socket *sock, int level, int 
optname, char __user *optv
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
+               lock_sock(sk);
                po->origdev = !!val;
+               release_sock(sk);
                return 0;
        }
        case PACKET_VNET_HDR:
@@ -3477,15 +3494,20 @@ packet_setsockopt(struct socket *sock, int level, int 
optname, char __user *optv
 
                if (sock->type != SOCK_RAW)
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (optlen < sizeof(val))
                        return -EINVAL;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
-               po->has_vnet_hdr = !!val;
-               return 0;
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->has_vnet_hdr = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
+               return ret;
        }
        case PACKET_TIMESTAMP:
        {
@@ -3516,11 +3538,17 @@ packet_setsockopt(struct socket *sock, int level, int 
optname, char __user *optv
 
                if (optlen != sizeof(val))
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
-               po->tp_tx_has_off = !!val;
+
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->tp_tx_has_off = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
                return 0;
        }
        default:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 0ecefd4..9408dbf 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -98,10 +98,12 @@ struct packet_sock {
        int                     copy_thresh;
        spinlock_t              bind_lock;
        struct mutex            pg_vec_lock;
-       unsigned int            running:1,      /* prot_hook is attached*/
-                               auxdata:1,
+       unsigned int            running;        /* bind_lock must be held */
+       unsigned int            auxdata:1,      /* writer must hold sock lock */
                                origdev:1,
-                               has_vnet_hdr:1;
+                               has_vnet_hdr:1,
+                               tp_loss:1,
+                               tp_tx_has_off:1;
        int                     ifindex;        /* bound device         */
        __be16                  num;
        struct packet_mclist    *mclist;
@@ -109,8 +111,6 @@ struct packet_sock {
        enum tpacket_versions   tp_version;
        unsigned int            tp_hdrlen;
        unsigned int            tp_reserve;
-       unsigned int            tp_loss:1;
-       unsigned int            tp_tx_has_off:1;
        unsigned int            tp_tstamp;
        struct net_device __rcu *cached_dev;
        struct packet_type      prot_hook ____cacheline_aligned_in_smp;
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to