Many socket operations can copy data between user and kernel space while socket lock is held. This means mm->mmap_sem can be taken after socket lock.
When implementing tcp mmap(), I forgot this and syzbot was kind enough to point this to my attention. This patch adds tcp_mmap_hook(), allowing us to grab socket lock before vm_mmap_pgoff() grabs mm->mmap_sem This same hook is responsible for releasing socket lock when vm_mmap_pgoff() has released mm->mmap_sem (or failed to acquire it) Note that follow-up patches can transfer code from tcp_mmap() to tcp_mmap_hook() to shorten tcp_mmap() execution time and thus increase mmap() performance in multi-threaded programs. Fixes: 93ab6cc69162 ("tcp: implement mmap() for zero copy receive") Signed-off-by: Eric Dumazet <eduma...@google.com> Reported-by: syzbot <syzkal...@googlegroups.com> --- include/net/tcp.h | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/tcp.c | 25 ++++++++++++++++++++++--- net/ipv6/af_inet6.c | 1 + 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 833154e3df173ea41aa16dd1ec739a175c679c5c..f68c8e8957840cacdbdd3d02bd149fce33ae324f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -404,6 +404,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); int tcp_set_rcvlowat(struct sock *sk, int val); void tcp_data_ready(struct sock *sk); +int tcp_mmap_hook(struct socket *sock, enum mmap_hook mode); int tcp_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); void tcp_parse_options(const struct net *net, const struct sk_buff *skb, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3ebf599cebaea4926decc1aad7274b12ec7e1566..af597440ff59c049b7fd02f7d7f79c23b9e195bb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -995,6 +995,7 @@ const struct proto_ops inet_stream_ops = { .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = tcp_mmap, + .mmap_hook = tcp_mmap_hook, .sendpage = inet_sendpage, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4022073b0aeea9d07af0fa825b640a00512908a3..e913b2dd5df321f2789e8d5f233ede9c2f1d5624 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1726,6 +1726,28 @@ int tcp_set_rcvlowat(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_set_rcvlowat); +/* mmap() on TCP needs to grab socket lock before current->mm->mmap_sem + * is taken in vm_mmap_pgoff() to avoid possible dead locks. + */ +int tcp_mmap_hook(struct socket *sock, enum mmap_hook mode) +{ + struct sock *sk = sock->sk; + + if (mode == MMAP_HOOK_PREPARE) { + lock_sock(sk); + /* TODO: Move here all the preparation work that can be done + * before having to grab current->mm->mmap_sem. + */ + return 0; + } + /* TODO: Move here the stuff that can been done after + * current->mm->mmap_sem has been released. + */ + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(tcp_mmap_hook); + /* When user wants to mmap X pages, we first need to perform the mapping * before freeing any skbs in receive queue, otherwise user would be unable * to fallback to standard recvmsg(). This happens if some data in the @@ -1756,8 +1778,6 @@ int tcp_mmap(struct file *file, struct socket *sock, /* TODO: Maybe the following is not needed if pages are COW */ vma->vm_flags &= ~VM_MAYWRITE; - lock_sock(sk); - ret = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; @@ -1833,7 +1853,6 @@ int tcp_mmap(struct file *file, struct socket *sock, ret = 0; out: - release_sock(sk); kvfree(pages_array); return ret; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 36d622c477b1ed3c5d2b753938444526344a6109..31ce68c001c223d3351f73453273ae517a051816 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -579,6 +579,7 @@ const struct proto_ops inet6_stream_ops = { .sendmsg = inet_sendmsg, /* ok */ .recvmsg = inet_recvmsg, /* ok */ .mmap = tcp_mmap, + .mmap_hook = tcp_mmap_hook, .sendpage = inet_sendpage, .sendmsg_locked = tcp_sendmsg_locked, .sendpage_locked = tcp_sendpage_locked, -- 2.17.0.484.g0c8726318c-goog