[PATCH 4/4] nfs: deadlock prevention for NFS
Provide a proper a_ops-swapfile() implementation for NFS. This will set the NFS socket to SOCK_VMIO and put the socket reconnection under PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done). Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] --- fs/nfs/file.c | 21 - include/linux/sunrpc/xprt.h |4 +++- net/sunrpc/xprtsock.c | 16 3 files changed, 39 insertions(+), 2 deletions(-) Index: linux-2.6/fs/nfs/file.c === --- linux-2.6.orig/fs/nfs/file.c +++ linux-2.6/fs/nfs/file.c @@ -27,6 +27,7 @@ #include linux/slab.h #include linux/pagemap.h #include linux/smp_lock.h +#include net/sock.h #include asm/uaccess.h #include asm/system.h @@ -317,7 +318,25 @@ static int nfs_release_page(struct page static int nfs_swapfile(struct address_space *mapping, int enable) { - return 0; + int err = -EINVAL; + struct rpc_clnt *client = NFS_CLIENT(mapping-host); + struct sock *sk = client-cl_xprt-inet; + + if (enable) { + client-cl_xprt-swapper = 1; + /* +* keep one extra sock reference so the reserve won't dip +* when the socket gets reconnected. +*/ + sk_adjust_memalloc(1, 1); + err = sk_set_vmio(sk); + } else if (client-cl_xprt-swapper) { + client-cl_xprt-swapper = 0; + sk_adjust_memalloc(-1, -1); + err = sk_clear_vmio(sk); + } + + return err; } const struct address_space_operations nfs_file_aops = { Index: linux-2.6/net/sunrpc/xprtsock.c === --- linux-2.6.orig/net/sunrpc/xprtsock.c +++ linux-2.6/net/sunrpc/xprtsock.c @@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *) args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) @@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void * dprintk(RPC: xs_udp_connect_worker for xprt %p\n, xprt); + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + /* Start by resetting any existing state */ xs_close(xprt); @@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void * xprt-sock = sock; xprt-inet = sk; + if (xprt-swapper) + sk_set_vmio(sk); + write_unlock_bh(sk-sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); @@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void * out: xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + current-flags = pflags; } /* @@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *)args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) goto out; + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + dprintk(RPC: xs_tcp_connect_worker for xprt %p\n, xprt); if (!xprt-sock) { @@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void * break; } } + + if (xprt-swapper) + sk_set_vmio(xprt-inet); out: xprt_wake_pending_tasks(xprt, status); out_clear: xprt_clear_connecting(xprt); + current-flags = pflags; } /** Index: linux-2.6/include/linux/sunrpc/xprt.h === --- linux-2.6.orig/include/linux/sunrpc/xprt.h +++ linux-2.6/include/linux/sunrpc/xprt.h @@ -147,7 +147,9 @@ struct rpc_xprt { unsigned intmax_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ - resvport : 1; /* use a reserved port */ + resvport : 1, /* use a reserved port */ + swapper: 1; /* we're swapping over this + transport */ /* * XID - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] nfs: deadlock prevention for NFS
On Fri, 2006-08-25 at 16:14 -0400, Trond Myklebust wrote: Grumble... If your patches are targetting NFS, could you please at the very least Cc [EMAIL PROTECTED] and/or myself. Sorry, will make sure you're on the CC list next round. On Fri, 2006-08-25 at 17:40 +0200, Peter Zijlstra wrote: Provide a proper a_ops-swapfile() implementation for NFS. This will set the NFS socket to SOCK_VMIO and put the socket reconnection under PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done). Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] --- fs/nfs/file.c | 21 - include/linux/sunrpc/xprt.h |4 +++- net/sunrpc/xprtsock.c | 16 3 files changed, 39 insertions(+), 2 deletions(-) Index: linux-2.6/fs/nfs/file.c === --- linux-2.6.orig/fs/nfs/file.c +++ linux-2.6/fs/nfs/file.c @@ -27,6 +27,7 @@ #include linux/slab.h #include linux/pagemap.h #include linux/smp_lock.h +#include net/sock.h #include asm/uaccess.h #include asm/system.h @@ -317,7 +318,25 @@ static int nfs_release_page(struct page static int nfs_swapfile(struct address_space *mapping, int enable) { - return 0; + int err = -EINVAL; + struct rpc_clnt *client = NFS_CLIENT(mapping-host); + struct sock *sk = client-cl_xprt-inet; + + if (enable) { + client-cl_xprt-swapper = 1; + /* +* keep one extra sock reference so the reserve won't dip +* when the socket gets reconnected. +*/ + sk_adjust_memalloc(1, 1); + err = sk_set_vmio(sk); + } else if (client-cl_xprt-swapper) { + client-cl_xprt-swapper = 0; + sk_adjust_memalloc(-1, -1); + err = sk_clear_vmio(sk); + } + + return err; } This all belongs in net/sunrpc/xprtsock.c. The NFS code has no business screwing around with the internals of the sunrpc transport. Ok, I'll make a function there, and call that. const struct address_space_operations nfs_file_aops = { Index: linux-2.6/net/sunrpc/xprtsock.c === --- linux-2.6.orig/net/sunrpc/xprtsock.c +++ linux-2.6/net/sunrpc/xprtsock.c @@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *) args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) @@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void * dprintk(RPC: xs_udp_connect_worker for xprt %p\n, xprt); + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + /* Start by resetting any existing state */ xs_close(xprt); @@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void * xprt-sock = sock; xprt-inet = sk; + if (xprt-swapper) + sk_set_vmio(sk); + write_unlock_bh(sk-sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); @@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void * out: xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + current-flags = pflags; } /* @@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *)args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) goto out; + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + dprintk(RPC: xs_tcp_connect_worker for xprt %p\n, xprt); if (!xprt-sock) { @@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void * break; } } + + if (xprt-swapper) + sk_set_vmio(xprt-inet); out: xprt_wake_pending_tasks(xprt, status); out_clear: xprt_clear_connecting(xprt); + current-flags = pflags; } How does this guarantee that the socket reconnection won't fail? I was afraid this might not be enough, I really have to go through the network code. Also, what about the case of rpc_malloc()? Can't that cause rpciod to deadlock when you add NFS swap into the equation? I will have to plead ignorance for now, I'll look into this on monday. On first glance it looks like rpc_malloc could use an |__GFP_EMERG for RPC_TASK_SWAPPER. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] nfs: deadlock prevention for NFS
Grumble... If your patches are targetting NFS, could you please at the very least Cc [EMAIL PROTECTED] and/or myself. On Fri, 2006-08-25 at 17:40 +0200, Peter Zijlstra wrote: Provide a proper a_ops-swapfile() implementation for NFS. This will set the NFS socket to SOCK_VMIO and put the socket reconnection under PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done). Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] --- fs/nfs/file.c | 21 - include/linux/sunrpc/xprt.h |4 +++- net/sunrpc/xprtsock.c | 16 3 files changed, 39 insertions(+), 2 deletions(-) Index: linux-2.6/fs/nfs/file.c === --- linux-2.6.orig/fs/nfs/file.c +++ linux-2.6/fs/nfs/file.c @@ -27,6 +27,7 @@ #include linux/slab.h #include linux/pagemap.h #include linux/smp_lock.h +#include net/sock.h #include asm/uaccess.h #include asm/system.h @@ -317,7 +318,25 @@ static int nfs_release_page(struct page static int nfs_swapfile(struct address_space *mapping, int enable) { - return 0; + int err = -EINVAL; + struct rpc_clnt *client = NFS_CLIENT(mapping-host); + struct sock *sk = client-cl_xprt-inet; + + if (enable) { + client-cl_xprt-swapper = 1; + /* + * keep one extra sock reference so the reserve won't dip + * when the socket gets reconnected. + */ + sk_adjust_memalloc(1, 1); + err = sk_set_vmio(sk); + } else if (client-cl_xprt-swapper) { + client-cl_xprt-swapper = 0; + sk_adjust_memalloc(-1, -1); + err = sk_clear_vmio(sk); + } + + return err; } This all belongs in net/sunrpc/xprtsock.c. The NFS code has no business screwing around with the internals of the sunrpc transport. const struct address_space_operations nfs_file_aops = { Index: linux-2.6/net/sunrpc/xprtsock.c === --- linux-2.6.orig/net/sunrpc/xprtsock.c +++ linux-2.6/net/sunrpc/xprtsock.c @@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *) args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) @@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void * dprintk(RPC: xs_udp_connect_worker for xprt %p\n, xprt); + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + /* Start by resetting any existing state */ xs_close(xprt); @@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void * xprt-sock = sock; xprt-inet = sk; + if (xprt-swapper) + sk_set_vmio(sk); + write_unlock_bh(sk-sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); @@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void * out: xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + current-flags = pflags; } /* @@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *)args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) goto out; + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + dprintk(RPC: xs_tcp_connect_worker for xprt %p\n, xprt); if (!xprt-sock) { @@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void * break; } } + + if (xprt-swapper) + sk_set_vmio(xprt-inet); out: xprt_wake_pending_tasks(xprt, status); out_clear: xprt_clear_connecting(xprt); + current-flags = pflags; } How does this guarantee that the socket reconnection won't fail? Also, what about the case of rpc_malloc()? Can't that cause rpciod to deadlock when you add NFS swap into the equation? Cheers, Trond - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] nfs: deadlock prevention for NFS
/* Check if this were the first socks: */ if (nr_socks - socks == 0) reserve += RX_RESERVE_PAGES; Can of course be: if (nr_socks == socks) reserve += RX_RESERVE_PAGES; Grumble, Indan - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html