Re: [PATCH] NFS: Stop sillyname renames and unmounts from racing

2007-11-08 Thread Steve Dickson
Alexander Viro wrote:
> On Mon, Nov 05, 2007 at 09:06:36PM -0800, Andrew Morton wrote:
>>> Any objections to exporting the inode_lock spin lock?
>>> If so, how should modules _safely_ access the s_inode list?
> 
>> That's going to make hch unhappy.
> 
> That's going to make me just as unhappy, especially since it's pointless;
> instead of the entire sorry mess we should just bump sb->s_active to pin
> the superblock down (we know that it's active at that point, so it's just
> an atomic_inc(); no games with locking, etc., are needed) and call
> deactivate_super() on the way out.  And deactivate_super() is exported
> already.

Even though Al suggestion did indeed fix the race and was incredibility 
simple (adding two lines of code), it turns out calling deactivate_super()
from the rpciod kernel daemon is not good, since it can cause deadlocks 
with other async tasks.

Following a suggestion from Trond, I decided to go back to the original idea 
of using put_super, but keeping all the changes inside the NFS client code.

So the following patch adds an active/deactive mechanism to the 
nfs_server structure (very similar to the s_active/deactivate_super()
mechanism) that any async opt (in this case silly renames) can use
to stop unmount for occurring before they are finished. 

This means there is no need to expose any spin locks, no need for an
O(n2) search and all the changes are contained inside the 
NFS client code. 

steved.

[Note: I did make the proposed corrections to my email client, so 
hopefully space-stuffing will be fixed. ]

Author: Steve Dickson <[EMAIL PROTECTED]>

Added an active/deactive mechanism to the nfs_server structure
allowing async operations to hold off umount until the
operations are done.

Signed-off-by: Steve Dickson <[EMAIL PROTECTED]>

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 70587f3..2ecf726 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -593,6 +593,10 @@ static int nfs_init_server(struct nfs_server *server,
server->namelen  = data->namlen;
/* Create a client RPC handle for the NFSv3 ACL management interface */
nfs_init_server_aclclient(server);
+
+   init_waitqueue_head(&server->active_wq);
+   atomic_set(&server->active, 0);
+
dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
return 0;
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fa517ae..e07ac96 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -202,6 +202,7 @@ static int nfs_get_sb(struct file_system_type *, int, const 
char *, void *, stru
 static int nfs_xdev_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct 
vfsmount *mnt);
 static void nfs_kill_super(struct super_block *);
+static void nfs_put_super(struct super_block *);
 
 static struct file_system_type nfs_fs_type = {
.owner  = THIS_MODULE,
@@ -223,6 +224,7 @@ static const struct super_operations nfs_sops = {
.alloc_inode= nfs_alloc_inode,
.destroy_inode  = nfs_destroy_inode,
.write_inode= nfs_write_inode,
+   .put_super  = nfs_put_super,
.statfs = nfs_statfs,
.clear_inode= nfs_clear_inode,
.umount_begin   = nfs_umount_begin,
@@ -1767,6 +1769,17 @@ static void nfs4_kill_super(struct super_block *sb)
nfs_free_server(server);
 }
 
+static void nfs_put_super(struct super_block *sb)
+{
+   struct nfs_server *server = NFS_SB(sb);
+   /*
+* Make sure there are no outstanding ops to this server.
+* If so, wait for them to finish before allowing the
+* unmount to continue.
+*/
+   wait_event(server->active_wq, atomic_read(&server->active) == 0);
+}
+
 /*
  * Clone an NFS4 server record on xdev traversal (FSID-change)
  */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 233ad38..cf12a24 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -29,6 +29,7 @@ struct nfs_unlinkdata {
 static void
 nfs_free_unlinkdata(struct nfs_unlinkdata *data)
 {
+   nfs_sb_deactive(NFS_SERVER(data->dir));
iput(data->dir);
put_rpccred(data->cred);
kfree(data->args.name.name);
@@ -151,6 +152,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct 
inode *dir, struct n
nfs_dec_sillycount(dir);
return 0;
}
+   nfs_sb_active(NFS_SERVER(dir));
data->args.fh = NFS_FH(dir);
nfs_fattr_init(&data->res.dir_attr);
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 0cac49b..6ef3af8 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -4,6 +4,8 @@
 #include 
 #include 
 
+#include 
+
 struct nfs_iostats;
 
 /*
@@ -110,8 +112,23 @@ struct nfs_server {
   filesystem */
 #endif
void (*destroy)(struct nfs_server *);
+
+   atomic_t active; /* Keep trace of any activity to this server */
+   wait_queue_head

Re: [PATCH] NFS: Stop sillyname renames and unmounts from racing

2007-11-06 Thread Alexander Viro
On Tue, Nov 06, 2007 at 10:24:50AM +0200, Benny Halevy wrote:

> It'd be very nice if the silly renamed inodes (with silly_count > 1) were 
> moved
> to a different list in the first pass, under the inode_lock, and then waited 
> on
> until silly_count <= 1 in a second pass only on the filtered list.  This will
> provide you with O(1).

It's absolutely pointless, starting with any kind of searching for inodes,
etc.  If you want fs shutdown _not_ to happen until async activity of
that kind is over, don't reinvent the sodding wheels, just tell VFS that
you are holding an active reference to superblock.  End of story.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] NFS: Stop sillyname renames and unmounts from racing

2007-11-06 Thread Benny Halevy
On Nov. 06, 2007, 7:06 +0200, Andrew Morton <[EMAIL PROTECTED]> wrote:
> On Sat, 03 Nov 2007 07:09:25 -0400 Steve Dickson <[EMAIL PROTECTED]> wrote:
> 
>> The following patch stops NFS sillyname renames and umounts from racing.
> 
> (appropriate cc's added)
> 
>> I have a test script does the following:
>>  1) start nfs server
>>   2) mount loopback
>>   3) open file in background
>>   4) remove file
>>   5) stop nfs server
>>   6) kill -9 process which has file open
>>   7) restart nfs server
>>   8) umount looback mount.
>>
>> After umount I got the "VFS: Busy inodes after unmount" message
>> because the processing of the rename has not finished.
>>
>> Below is a patch that the uses the new silly_count mechanism to
>> synchronize sillyname processing and umounts. The patch introduces a
>> nfs_put_super() routine that waits until the nfsi->silly_count count
>> is zero.
>>
>> A side-effect of finding and waiting for all the inode to
>> find the sillyname processing, is I need to traverse
>> the sb->s_inodes list in the supper block. To do that
>> safely the inode_lock spin lock has to be held. So for
>> modules to be able to "see" that lock I needed to
>> EXPORT_SYMBOL_GPL() it.
>>
>> Any objections to exporting the inode_lock spin lock?
>> If so, how should modules _safely_ access the s_inode list?
>>
>> steved.
>>
>>
>> Author: Steve Dickson <[EMAIL PROTECTED]>
>> Date:   Wed Oct 31 12:19:26 2007 -0400
>>
>>  Close a unlink/sillyname rename and umount race by added a
>>  nfs_put_super routine that will run through all the inode
>>  currently on the super block, waiting for those that are
>>  in the middle of a sillyname rename or removal.
>>
>>  This patch stop the infamous "VFS: Busy inodes after unmount... "
>>  warning during umounts.
>>
>>  Signed-off-by: Steve Dickson <[EMAIL PROTECTED]>
>>
>> diff --git a/fs/inode.c b/fs/inode.c
>> index ed35383..da9034a 100644
>> --- a/fs/inode.c
>> +++ b/fs/inode.c
>> @@ -81,6 +81,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
>>* the i_state of an inode while it is in use..
>>*/
>>   DEFINE_SPINLOCK(inode_lock);
>> +EXPORT_SYMBOL_GPL(inode_lock);
> 
> That's going to make hch unhappy.
> 
> Your email client is performing space-stuffing.
> See http://mbligh.org/linuxdocs/Email/Clients/Thunderbird
> 
>>   static struct file_system_type nfs_fs_type = {
>>  .owner  = THIS_MODULE,
>> @@ -223,6 +225,7 @@ static const struct super_operations nfs_sops = {
>>  .alloc_inode= nfs_alloc_inode,
>>  .destroy_inode  = nfs_destroy_inode,
>>  .write_inode= nfs_write_inode,
>> +.put_super  = nfs_put_super,
>>  .statfs = nfs_statfs,
>>  .clear_inode= nfs_clear_inode,
>>  .umount_begin   = nfs_umount_begin,
>> @@ -1767,6 +1770,30 @@ static void nfs4_kill_super(struct super_block *sb)
>>  nfs_free_server(server);
>>   }
>>
>> +void nfs_put_super(struct super_block *sb)
> 
> This was (correctly) declared to be static.  We should define it that way
> too (I didn't know you could do this, actually).
> 
>> +{
>> +struct inode *inode;
>> +struct nfs_inode *nfsi;
>> +/*
>> + * Make sure there are no outstanding renames
>> + */
>> +relock:
>> +spin_lock(&inode_lock);
>> +list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
>> +nfsi = NFS_I(inode);
>> +if (atomic_read(&nfsi->silly_count) > 0) {
>> +/* Keep this inode around  during the wait */
>> +atomic_inc(&inode->i_count);
>> +spin_unlock(&inode_lock);
>> +wait_event(nfsi->waitqueue,
>> +atomic_read(&nfsi->silly_count) == 1);
>> +iput(inode);
>> +goto relock;
>> +}
>> +}
>> +spin_unlock(&inode_lock);
>> +}
> 
> That's an O(n^2) search.  If it is at all possible to hit a catastrophic
> slowdown in here, you can bet that someone out there will indeed hit it in
> real life.
> 
> I'm too lazy to look, but we might need to check things like I_FREEING
> and I_CLEAR before taking a ref on this inode.

It'd be very nice if the silly renamed inodes (with silly_count > 1) were moved
to a different list in the first pass, under the inode_lock, and then waited on
until silly_count <= 1 in a second pass only on the filtered list.  This will
provide you with O(1).

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] NFS: Stop sillyname renames and unmounts from racing

2007-11-05 Thread Alexander Viro
On Mon, Nov 05, 2007 at 09:06:36PM -0800, Andrew Morton wrote:
> > Any objections to exporting the inode_lock spin lock?
> > If so, how should modules _safely_ access the s_inode list?

> That's going to make hch unhappy.

That's going to make me just as unhappy, especially since it's pointless;
instead of the entire sorry mess we should just bump sb->s_active to pin
the superblock down (we know that it's active at that point, so it's just
an atomic_inc(); no games with locking, etc., are needed) and call
deactivate_super() on the way out.  And deactivate_super() is exported
already.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] NFS: Stop sillyname renames and unmounts from racing

2007-11-05 Thread Andrew Morton
On Sat, 03 Nov 2007 07:09:25 -0400 Steve Dickson <[EMAIL PROTECTED]> wrote:

> The following patch stops NFS sillyname renames and umounts from racing.

(appropriate cc's added)

> I have a test script does the following:
>  1) start nfs server
>   2) mount loopback
>   3) open file in background
>   4) remove file
>   5) stop nfs server
>   6) kill -9 process which has file open
>   7) restart nfs server
>   8) umount looback mount.
> 
> After umount I got the "VFS: Busy inodes after unmount" message
> because the processing of the rename has not finished.
> 
> Below is a patch that the uses the new silly_count mechanism to
> synchronize sillyname processing and umounts. The patch introduces a
> nfs_put_super() routine that waits until the nfsi->silly_count count
> is zero.
> 
> A side-effect of finding and waiting for all the inode to
> find the sillyname processing, is I need to traverse
> the sb->s_inodes list in the supper block. To do that
> safely the inode_lock spin lock has to be held. So for
> modules to be able to "see" that lock I needed to
> EXPORT_SYMBOL_GPL() it.
> 
> Any objections to exporting the inode_lock spin lock?
> If so, how should modules _safely_ access the s_inode list?
> 
> steved.
> 
> 
> Author: Steve Dickson <[EMAIL PROTECTED]>
> Date:   Wed Oct 31 12:19:26 2007 -0400
> 
>  Close a unlink/sillyname rename and umount race by added a
>  nfs_put_super routine that will run through all the inode
>  currently on the super block, waiting for those that are
>  in the middle of a sillyname rename or removal.
> 
>  This patch stop the infamous "VFS: Busy inodes after unmount... "
>  warning during umounts.
> 
>  Signed-off-by: Steve Dickson <[EMAIL PROTECTED]>
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index ed35383..da9034a 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -81,6 +81,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
>* the i_state of an inode while it is in use..
>*/
>   DEFINE_SPINLOCK(inode_lock);
> +EXPORT_SYMBOL_GPL(inode_lock);

That's going to make hch unhappy.

Your email client is performing space-stuffing.
See http://mbligh.org/linuxdocs/Email/Clients/Thunderbird

>   static struct file_system_type nfs_fs_type = {
>   .owner  = THIS_MODULE,
> @@ -223,6 +225,7 @@ static const struct super_operations nfs_sops = {
>   .alloc_inode= nfs_alloc_inode,
>   .destroy_inode  = nfs_destroy_inode,
>   .write_inode= nfs_write_inode,
> + .put_super  = nfs_put_super,
>   .statfs = nfs_statfs,
>   .clear_inode= nfs_clear_inode,
>   .umount_begin   = nfs_umount_begin,
> @@ -1767,6 +1770,30 @@ static void nfs4_kill_super(struct super_block *sb)
>   nfs_free_server(server);
>   }
> 
> +void nfs_put_super(struct super_block *sb)

This was (correctly) declared to be static.  We should define it that way
too (I didn't know you could do this, actually).

> +{
> + struct inode *inode;
> + struct nfs_inode *nfsi;
> + /*
> +  * Make sure there are no outstanding renames
> +  */
> +relock:
> + spin_lock(&inode_lock);
> + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
> + nfsi = NFS_I(inode);
> + if (atomic_read(&nfsi->silly_count) > 0) {
> + /* Keep this inode around  during the wait */
> + atomic_inc(&inode->i_count);
> + spin_unlock(&inode_lock);
> + wait_event(nfsi->waitqueue,
> + atomic_read(&nfsi->silly_count) == 1);
> + iput(inode);
> + goto relock;
> + }
> + }
> + spin_unlock(&inode_lock);
> +}

That's an O(n^2) search.  If it is at all possible to hit a catastrophic
slowdown in here, you can bet that someone out there will indeed hit it in
real life.

I'm too lazy to look, but we might need to check things like I_FREEING
and I_CLEAR before taking a ref on this inode.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] NFS: Stop sillyname renames and unmounts from racing

2007-11-03 Thread Steve Dickson

The following patch stops NFS sillyname renames and umounts from racing.
I have a test script does the following:
1) start nfs server
 2) mount loopback
 3) open file in background
 4) remove file
 5) stop nfs server
 6) kill -9 process which has file open
 7) restart nfs server
 8) umount looback mount.

After umount I got the "VFS: Busy inodes after unmount" message
because the processing of the rename has not finished.

Below is a patch that the uses the new silly_count mechanism to
synchronize sillyname processing and umounts. The patch introduces a
nfs_put_super() routine that waits until the nfsi->silly_count count
is zero.

A side-effect of finding and waiting for all the inode to
find the sillyname processing, is I need to traverse
the sb->s_inodes list in the supper block. To do that
safely the inode_lock spin lock has to be held. So for
modules to be able to "see" that lock I needed to
EXPORT_SYMBOL_GPL() it.

Any objections to exporting the inode_lock spin lock?
If so, how should modules _safely_ access the s_inode list?

steved.


Author: Steve Dickson <[EMAIL PROTECTED]>
Date:   Wed Oct 31 12:19:26 2007 -0400

Close a unlink/sillyname rename and umount race by added a
nfs_put_super routine that will run through all the inode
currently on the super block, waiting for those that are
in the middle of a sillyname rename or removal.

This patch stop the infamous "VFS: Busy inodes after unmount... "
warning during umounts.

Signed-off-by: Steve Dickson <[EMAIL PROTECTED]>

diff --git a/fs/inode.c b/fs/inode.c
index ed35383..da9034a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -81,6 +81,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
  * the i_state of an inode while it is in use..
  */
 DEFINE_SPINLOCK(inode_lock);
+EXPORT_SYMBOL_GPL(inode_lock);

 /*
  * iprune_mutex provides exclusion between the kswapd or try_to_free_pages
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fa517ae..2ac3c34 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -48,6 +48,7 @@
 #include 
 #include 
 #include 
+#include 

 #include 
 #include 
@@ -202,6 +203,7 @@ static int nfs_get_sb(struct file_system_type *,
int, const char *, void *, stru
 static int nfs_xdev_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct 
vfsmount *mnt);
 static void nfs_kill_super(struct super_block *);
+static void nfs_put_super(struct super_block *);

 static struct file_system_type nfs_fs_type = {
.owner  = THIS_MODULE,
@@ -223,6 +225,7 @@ static const struct super_operations nfs_sops = {
.alloc_inode= nfs_alloc_inode,
.destroy_inode  = nfs_destroy_inode,
.write_inode= nfs_write_inode,
+   .put_super  = nfs_put_super,
.statfs = nfs_statfs,
.clear_inode= nfs_clear_inode,
.umount_begin   = nfs_umount_begin,
@@ -1767,6 +1770,30 @@ static void nfs4_kill_super(struct super_block *sb)
nfs_free_server(server);
 }

+void nfs_put_super(struct super_block *sb)
+{
+   struct inode *inode;
+   struct nfs_inode *nfsi;
+   /*
+* Make sure there are no outstanding renames
+*/
+relock:
+   spin_lock(&inode_lock);
+   list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+   nfsi = NFS_I(inode);
+   if (atomic_read(&nfsi->silly_count) > 0) {
+   /* Keep this inode around  during the wait */
+   atomic_inc(&inode->i_count);
+   spin_unlock(&inode_lock);
+   wait_event(nfsi->waitqueue,
+   atomic_read(&nfsi->silly_count) == 1);
+   iput(inode);
+   goto relock;
+   }
+   }
+   spin_unlock(&inode_lock);
+}
+
 /*
  * Clone an NFS4 server record on xdev traversal (FSID-change)
  */

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/