On Mon, Mar 17, 2025 at 10:52:25AM +0100, Michal Luczaj wrote:
Signal delivery during connect() may lead to a disconnect of an already
established socket. That involves removing socket from any sockmap and
resetting state to SS_UNCONNECTED. While it correctly restores socket's
proto, a call to vsock_bpf_recvmsg() might have been already under way in
another thread. If the connect()ing thread reassigns the vsock transport to
NULL, the recvmsg()ing thread may trigger a WARN_ON_ONCE.

connect
 / state = SS_CONNECTED /
                               sock_map_update_elem
                               vsock_bpf_recvmsg
                                 psock = sk_psock_get()
 lock sk
 if signal_pending
   unhash
     sock_map_remove_links
   state = SS_UNCONNECTED
 release sk

connect
 transport = NULL
                                 lock sk
                                 WARN_ON_ONCE(!vsk->transport)

Protect recvmsg() from racing against transport reassignment. Enforce the
sockmap invariant that psock implies transport: lock socket before getting
psock.

WARNING: CPU: 9 PID: 1222 at net/vmw_vsock/vsock_bpf.c:92 
vsock_bpf_recvmsg+0xb55/0xe00
CPU: 9 UID: 0 PID: 1222 Comm: a.out Not tainted 6.14.0-rc5+
RIP: 0010:vsock_bpf_recvmsg+0xb55/0xe00
sock_recvmsg+0x1b2/0x220
__sys_recvfrom+0x190/0x270
__x64_sys_recvfrom+0xdc/0x1b0
do_syscall_64+0x93/0x1b0
entry_SYSCALL_64_after_hwframe+0x76/0x7e

Fixes: 634f1a7110b4 ("vsock: support sockmap")
Signed-off-by: Michal Luczaj <m...@rbox.co>
---
net/vmw_vsock/vsock_bpf.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
index 
c68fdaf09046b68254dac3ea70ffbe73dfa45cef..5138195d91fb258d4bc09b48e80e13651d62863a
 100644
--- a/net/vmw_vsock/vsock_bpf.c
+++ b/net/vmw_vsock/vsock_bpf.c
@@ -73,28 +73,35 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr 
*msg, size_t len, int
        return err;
}

-static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
-                            size_t len, int flags, int *addr_len)
+static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+                            int flags, int *addr_len)

I would avoid this change, especially in a patch with the Fixes tag then to be backported.

{
        struct sk_psock *psock;
        struct vsock_sock *vsk;
        int copied;

+       /* Since signal delivery during connect() may reset the state of socket
+        * that's already in a sockmap, take the lock before checking on psock.
+        * This serializes a possible transport reassignment, protecting this
+        * function from running with NULL transport.
+        */
+       lock_sock(sk);
+
        psock = sk_psock_get(sk);
-       if (unlikely(!psock))
+       if (unlikely(!psock)) {
+               release_sock(sk);
                return __vsock_recvmsg(sk, msg, len, flags);
+       }

-       lock_sock(sk);
        vsk = vsock_sk(sk);
-
        if (WARN_ON_ONCE(!vsk->transport)) {
                copied = -ENODEV;
                goto out;
        }

        if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) {
-               release_sock(sk);
                sk_psock_put(sk, psock);
+               release_sock(sk);

But here we release it, so can still a reset happen at this point, before calling __vsock_connectible_recvmsg(). In there anyway we handle the case where transport is null, so there's no problem, right?

The rest LTGM.

Thanks,
Stefano

                return __vsock_recvmsg(sk, msg, len, flags);
        }

@@ -108,8 +115,8 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr 
*msg,
                }

                if (sk_psock_queue_empty(psock)) {
-                       release_sock(sk);
                        sk_psock_put(sk, psock);
+                       release_sock(sk);
                        return __vsock_recvmsg(sk, msg, len, flags);
                }

@@ -117,8 +124,8 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr 
*msg,
        }

out:
-       release_sock(sk);
        sk_psock_put(sk, psock);
+       release_sock(sk);

        return copied;
}

--
2.48.1



Reply via email to