[PATCH 4/4] nfs: deadlock prevention for NFS

2006-08-25 Thread Peter Zijlstra

Provide a proper a_ops-swapfile() implementation for NFS. This will
set the NFS socket to SOCK_VMIO and put the socket reconnection under
PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done).

Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
---
 fs/nfs/file.c   |   21 -
 include/linux/sunrpc/xprt.h |4 +++-
 net/sunrpc/xprtsock.c   |   16 
 3 files changed, 39 insertions(+), 2 deletions(-)

Index: linux-2.6/fs/nfs/file.c
===
--- linux-2.6.orig/fs/nfs/file.c
+++ linux-2.6/fs/nfs/file.c
@@ -27,6 +27,7 @@
 #include linux/slab.h
 #include linux/pagemap.h
 #include linux/smp_lock.h
+#include net/sock.h
 
 #include asm/uaccess.h
 #include asm/system.h
@@ -317,7 +318,25 @@ static int nfs_release_page(struct page 
 
 static int nfs_swapfile(struct address_space *mapping, int enable)
 {
-   return 0;
+   int err = -EINVAL;
+   struct rpc_clnt *client = NFS_CLIENT(mapping-host);
+   struct sock *sk = client-cl_xprt-inet;
+
+   if (enable) {
+   client-cl_xprt-swapper = 1;
+   /*
+* keep one extra sock reference so the reserve won't dip
+* when the socket gets reconnected.
+*/
+   sk_adjust_memalloc(1, 1);
+   err = sk_set_vmio(sk);
+   } else if (client-cl_xprt-swapper) {
+   client-cl_xprt-swapper = 0;
+   sk_adjust_memalloc(-1, -1);
+   err = sk_clear_vmio(sk);
+   }
+
+   return err;
 }
 
 const struct address_space_operations nfs_file_aops = {
Index: linux-2.6/net/sunrpc/xprtsock.c
===
--- linux-2.6.orig/net/sunrpc/xprtsock.c
+++ linux-2.6/net/sunrpc/xprtsock.c
@@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void *
 {
struct rpc_xprt *xprt = (struct rpc_xprt *) args;
struct socket *sock = xprt-sock;
+   unsigned long pflags = current-flags;
int err, status = -EIO;
 
if (xprt-shutdown || xprt-addr.sin_port == 0)
@@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void *
 
dprintk(RPC:  xs_udp_connect_worker for xprt %p\n, xprt);
 
+   if (xprt-swapper)
+   current-flags |= PF_MEMALLOC;
+
/* Start by resetting any existing state */
xs_close(xprt);
 
@@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void *
xprt-sock = sock;
xprt-inet = sk;
 
+   if (xprt-swapper)
+   sk_set_vmio(sk);
+
write_unlock_bh(sk-sk_callback_lock);
}
xs_udp_do_set_buffer_size(xprt);
@@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void *
 out:
xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+   current-flags = pflags;
 }
 
 /*
@@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void *
 {
struct rpc_xprt *xprt = (struct rpc_xprt *)args;
struct socket *sock = xprt-sock;
+   unsigned long pflags = current-flags;
int err, status = -EIO;
 
if (xprt-shutdown || xprt-addr.sin_port == 0)
goto out;
 
+   if (xprt-swapper)
+   current-flags |= PF_MEMALLOC;
+
dprintk(RPC:  xs_tcp_connect_worker for xprt %p\n, xprt);
 
if (!xprt-sock) {
@@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void *
break;
}
}
+
+   if (xprt-swapper)
+   sk_set_vmio(xprt-inet);
 out:
xprt_wake_pending_tasks(xprt, status);
 out_clear:
xprt_clear_connecting(xprt);
+   current-flags = pflags;
 }
 
 /**
Index: linux-2.6/include/linux/sunrpc/xprt.h
===
--- linux-2.6.orig/include/linux/sunrpc/xprt.h
+++ linux-2.6/include/linux/sunrpc/xprt.h
@@ -147,7 +147,9 @@ struct rpc_xprt {
unsigned intmax_reqs;   /* total slots */
unsigned long   state;  /* transport state */
unsigned char   shutdown   : 1, /* being shut down */
-   resvport   : 1; /* use a reserved port */
+   resvport   : 1, /* use a reserved port */
+   swapper: 1; /* we're swapping over this
+  transport */
 
/*
 * XID
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] nfs: deadlock prevention for NFS

2006-08-25 Thread Peter Zijlstra
On Fri, 2006-08-25 at 16:14 -0400, Trond Myklebust wrote:
 Grumble... If your patches are targetting NFS, could you please at the
 very least Cc [EMAIL PROTECTED] and/or myself.

Sorry, will make sure you're on the CC list next round.

 On Fri, 2006-08-25 at 17:40 +0200, Peter Zijlstra wrote:
  Provide a proper a_ops-swapfile() implementation for NFS. This will
  set the NFS socket to SOCK_VMIO and put the socket reconnection under
  PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done).
  
  Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
  ---
   fs/nfs/file.c   |   21 -
   include/linux/sunrpc/xprt.h |4 +++-
   net/sunrpc/xprtsock.c   |   16 
   3 files changed, 39 insertions(+), 2 deletions(-)
  
  Index: linux-2.6/fs/nfs/file.c
  ===
  --- linux-2.6.orig/fs/nfs/file.c
  +++ linux-2.6/fs/nfs/file.c
  @@ -27,6 +27,7 @@
   #include linux/slab.h
   #include linux/pagemap.h
   #include linux/smp_lock.h
  +#include net/sock.h
   
   #include asm/uaccess.h
   #include asm/system.h
  @@ -317,7 +318,25 @@ static int nfs_release_page(struct page 
   
   static int nfs_swapfile(struct address_space *mapping, int enable)
   {
  -   return 0;
  +   int err = -EINVAL;
  +   struct rpc_clnt *client = NFS_CLIENT(mapping-host);
  +   struct sock *sk = client-cl_xprt-inet;
  +
  +   if (enable) {
  +   client-cl_xprt-swapper = 1;
  +   /*
  +* keep one extra sock reference so the reserve won't dip
  +* when the socket gets reconnected.
  +*/
  +   sk_adjust_memalloc(1, 1);
  +   err = sk_set_vmio(sk);
  +   } else if (client-cl_xprt-swapper) {
  +   client-cl_xprt-swapper = 0;
  +   sk_adjust_memalloc(-1, -1);
  +   err = sk_clear_vmio(sk);
  +   }
  +
  +   return err;
   }
 
 This all belongs in net/sunrpc/xprtsock.c. The NFS code has no business
 screwing around with the internals of the sunrpc transport.

Ok, I'll make a function there, and call that.

   const struct address_space_operations nfs_file_aops = {
  Index: linux-2.6/net/sunrpc/xprtsock.c
  ===
  --- linux-2.6.orig/net/sunrpc/xprtsock.c
  +++ linux-2.6/net/sunrpc/xprtsock.c
  @@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void *
   {
  struct rpc_xprt *xprt = (struct rpc_xprt *) args;
  struct socket *sock = xprt-sock;
  +   unsigned long pflags = current-flags;
  int err, status = -EIO;
   
  if (xprt-shutdown || xprt-addr.sin_port == 0)
  @@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void *
   
  dprintk(RPC:  xs_udp_connect_worker for xprt %p\n, xprt);
   
  +   if (xprt-swapper)
  +   current-flags |= PF_MEMALLOC;
  +
  /* Start by resetting any existing state */
  xs_close(xprt);
   
  @@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void *
  xprt-sock = sock;
  xprt-inet = sk;
   
  +   if (xprt-swapper)
  +   sk_set_vmio(sk);
  +
  write_unlock_bh(sk-sk_callback_lock);
  }
  xs_udp_do_set_buffer_size(xprt);
  @@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void *
   out:
  xprt_wake_pending_tasks(xprt, status);
  xprt_clear_connecting(xprt);
  +   current-flags = pflags;
   }
   
   /*
  @@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void *
   {
  struct rpc_xprt *xprt = (struct rpc_xprt *)args;
  struct socket *sock = xprt-sock;
  +   unsigned long pflags = current-flags;
  int err, status = -EIO;
   
  if (xprt-shutdown || xprt-addr.sin_port == 0)
  goto out;
   
  +   if (xprt-swapper)
  +   current-flags |= PF_MEMALLOC;
  +
  dprintk(RPC:  xs_tcp_connect_worker for xprt %p\n, xprt);
   
  if (!xprt-sock) {
  @@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void *
  break;
  }
  }
  +
  +   if (xprt-swapper)
  +   sk_set_vmio(xprt-inet);
   out:
  xprt_wake_pending_tasks(xprt, status);
   out_clear:
  xprt_clear_connecting(xprt);
  +   current-flags = pflags;
   }
 
 How does this guarantee that the socket reconnection won't fail?

I was afraid this might not be enough, I really have to go through the
network code.

 Also, what about the case of rpc_malloc()? Can't that cause rpciod to
 deadlock when you add NFS swap into the equation?

I will have to plead ignorance for now, I'll look into this on monday.
On first glance it looks like rpc_malloc could use an |__GFP_EMERG for
RPC_TASK_SWAPPER.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] nfs: deadlock prevention for NFS

2006-08-25 Thread Trond Myklebust
Grumble... If your patches are targetting NFS, could you please at the
very least Cc [EMAIL PROTECTED] and/or myself.


On Fri, 2006-08-25 at 17:40 +0200, Peter Zijlstra wrote:
 Provide a proper a_ops-swapfile() implementation for NFS. This will
 set the NFS socket to SOCK_VMIO and put the socket reconnection under
 PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done).
 
 Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
 ---
  fs/nfs/file.c   |   21 -
  include/linux/sunrpc/xprt.h |4 +++-
  net/sunrpc/xprtsock.c   |   16 
  3 files changed, 39 insertions(+), 2 deletions(-)
 
 Index: linux-2.6/fs/nfs/file.c
 ===
 --- linux-2.6.orig/fs/nfs/file.c
 +++ linux-2.6/fs/nfs/file.c
 @@ -27,6 +27,7 @@
  #include linux/slab.h
  #include linux/pagemap.h
  #include linux/smp_lock.h
 +#include net/sock.h
  
  #include asm/uaccess.h
  #include asm/system.h
 @@ -317,7 +318,25 @@ static int nfs_release_page(struct page 
  
  static int nfs_swapfile(struct address_space *mapping, int enable)
  {
 - return 0;
 + int err = -EINVAL;
 + struct rpc_clnt *client = NFS_CLIENT(mapping-host);
 + struct sock *sk = client-cl_xprt-inet;
 +
 + if (enable) {
 + client-cl_xprt-swapper = 1;
 + /*
 +  * keep one extra sock reference so the reserve won't dip
 +  * when the socket gets reconnected.
 +  */
 + sk_adjust_memalloc(1, 1);
 + err = sk_set_vmio(sk);
 + } else if (client-cl_xprt-swapper) {
 + client-cl_xprt-swapper = 0;
 + sk_adjust_memalloc(-1, -1);
 + err = sk_clear_vmio(sk);
 + }
 +
 + return err;
  }

This all belongs in net/sunrpc/xprtsock.c. The NFS code has no business
screwing around with the internals of the sunrpc transport.

  const struct address_space_operations nfs_file_aops = {
 Index: linux-2.6/net/sunrpc/xprtsock.c
 ===
 --- linux-2.6.orig/net/sunrpc/xprtsock.c
 +++ linux-2.6/net/sunrpc/xprtsock.c
 @@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void *
  {
   struct rpc_xprt *xprt = (struct rpc_xprt *) args;
   struct socket *sock = xprt-sock;
 + unsigned long pflags = current-flags;
   int err, status = -EIO;
  
   if (xprt-shutdown || xprt-addr.sin_port == 0)
 @@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void *
  
   dprintk(RPC:  xs_udp_connect_worker for xprt %p\n, xprt);
  
 + if (xprt-swapper)
 + current-flags |= PF_MEMALLOC;
 +
   /* Start by resetting any existing state */
   xs_close(xprt);
  
 @@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void *
   xprt-sock = sock;
   xprt-inet = sk;
  
 + if (xprt-swapper)
 + sk_set_vmio(sk);
 +
   write_unlock_bh(sk-sk_callback_lock);
   }
   xs_udp_do_set_buffer_size(xprt);
 @@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void *
  out:
   xprt_wake_pending_tasks(xprt, status);
   xprt_clear_connecting(xprt);
 + current-flags = pflags;
  }
  
  /*
 @@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void *
  {
   struct rpc_xprt *xprt = (struct rpc_xprt *)args;
   struct socket *sock = xprt-sock;
 + unsigned long pflags = current-flags;
   int err, status = -EIO;
  
   if (xprt-shutdown || xprt-addr.sin_port == 0)
   goto out;
  
 + if (xprt-swapper)
 + current-flags |= PF_MEMALLOC;
 +
   dprintk(RPC:  xs_tcp_connect_worker for xprt %p\n, xprt);
  
   if (!xprt-sock) {
 @@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void *
   break;
   }
   }
 +
 + if (xprt-swapper)
 + sk_set_vmio(xprt-inet);
  out:
   xprt_wake_pending_tasks(xprt, status);
  out_clear:
   xprt_clear_connecting(xprt);
 + current-flags = pflags;
  }

How does this guarantee that the socket reconnection won't fail?

Also, what about the case of rpc_malloc()? Can't that cause rpciod to
deadlock when you add NFS swap into the equation?

Cheers,
  Trond

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] nfs: deadlock prevention for NFS

2006-08-25 Thread Indan Zupancic
   /* Check if this were the first socks: */
   if (nr_socks - socks == 0)
   reserve += RX_RESERVE_PAGES;

Can of course be:

if (nr_socks == socks)
reserve += RX_RESERVE_PAGES;

Grumble,

Indan


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html