On Mon, Jan 4, 2021 at 7:57 PM Dr. David Alan Gilbert
<[email protected]> wrote:
>
> * Vivek Goyal ([email protected]) wrote:
> > On Mon, Jan 04, 2021 at 04:00:13PM +0000, Dr. David Alan Gilbert wrote:
> > > Hi,
> > > On virtio-fs we're hitting a problem with NFS, where
> > > unlinking a file in a directory and then rmdir'ing that
> > > directory fails complaining about the directory not being empty.
> > >
> > > The problem here is that if a file has an open fd, NFS doesn't
> > > actually delete the file on unlink, it just renames it to
> > > a hidden file (e.g. .nfs*******). That open file is there because
> > > the 'FORGET' hasn't completed yet by the time the rmdir is issued.
> > >
> > > Question:
> > > a) In the FUSE protocol, are requests assumed to complete in order;
> > > i.e. unlink, forget, rmdir is it required that 'forget' completes
> > > before the rmdir is processed?
> > > (In virtiofs we've been processing requests, in parallel, and
> > > have sent forgets down a separate queue to keep them out of the way).
> > >
> > > b) 'forget' doesn't send a reply - so the kernel can't wait for the
> > > client to have finished it; do we need a synchronous forget here?
> >
> > Even if we introduce a synchronous forget, will that really fix the
> > issue. For example, this could also happen if file has been unlinked
> > but it is still open and directory is being removed.
> >
> > fd = open(foo/bar.txt)
> > unlink foo/bar.txt
> > rmdir foo
> > close(fd).
> >
> > In this case, final forget should go after fd has been closed. Its
> > not a forget race.
> >
> > I wrote a test case for this and it works on regular file systems.
> >
> > https://github.com/rhvgoyal/misc/blob/master/virtiofs-tests/rmdir.c
> >
> > I suspect it will fail on nfs because I am assuming that temporary
> > file will be there till final close(fd) happens. If that's the
> > case this is a NFS specific issue because its behavior is different
> > from other file systems.
>
> That's true; but that's NFS just being NFS; in our case we're keeping
> an fd open even though the guest has been smart enough not to; so we're
> causing the NFS oddity when it wouldn't normally happen.
Can't think of anything better than a synchronous forget. Compile
only tested patch attached.
Thanks,
Miklos
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 78f9f209078c..daa4e669441d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -373,6 +373,26 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
return ERR_PTR(err);
}
+static void fuse_dentry_iput(struct dentry *dentry, struct inode *inode)
+{
+ if (!__lockref_is_dead(&dentry->d_lockref)) {
+ /*
+ * This is an unlink/rmdir and removing the last ref to the
+ * dentry. Use synchronous FORGET in case filesystem requests
+ * it.
+ *
+ * FIXME: This is racy! Two or more instances of
+ * fuse_dentry_iput() could be running concurrently (unlink of
+ * several aliases in different directories).
+ */
+ set_bit(FUSE_I_SYNC_FORGET, &get_fuse_inode(inode)->state);
+ iput(inode);
+ clear_bit(FUSE_I_SYNC_FORGET, &get_fuse_inode(inode)->state);
+ } else {
+ iput(inode);
+ }
+}
+
const struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
.d_delete = fuse_dentry_delete,
@@ -381,6 +401,7 @@ const struct dentry_operations fuse_dentry_operations = {
.d_release = fuse_dentry_release,
#endif
.d_automount = fuse_dentry_automount,
+ .d_iput = fuse_dentry_iput,
};
const struct dentry_operations fuse_root_dentry_operations = {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 7c4b8cb93f9f..0820b7a63ca7 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -174,6 +174,8 @@ enum {
FUSE_I_SIZE_UNSTABLE,
/* Bad inode */
FUSE_I_BAD,
+ /* Synchronous forget requested */
+ FUSE_I_SYNC_FORGET,
};
struct fuse_conn;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index b0e18b470e91..a49ff30d1ecc 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -115,6 +115,26 @@ static void fuse_free_inode(struct inode *inode)
kmem_cache_free(fuse_inode_cachep, fi);
}
+static void fuse_sync_forget(struct inode *inode)
+{
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_forget_in inarg;
+ FUSE_ARGS(args);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.nlookup = fi->nlookup;
+ args.opcode = FUSE_SYNC_FORGET;
+ args.nodeid = fi->nodeid;
+ args.in_numargs = 1;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.force = true;
+
+ fuse_simple_request(fm, &args);
+ /* ignore errors */
+}
+
static void fuse_evict_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -127,9 +147,13 @@ static void fuse_evict_inode(struct inode *inode)
if (FUSE_IS_DAX(inode))
fuse_dax_inode_cleanup(inode);
if (fi->nlookup) {
- fuse_queue_forget(fc, fi->forget, fi->nodeid,
- fi->nlookup);
- fi->forget = NULL;
+ if (test_bit(FUSE_I_SYNC_FORGET, &fi->state)) {
+ fuse_sync_forget(inode);
+ } else {
+ fuse_queue_forget(fc, fi->forget, fi->nodeid,
+ fi->nlookup);
+ fi->forget = NULL;
+ }
}
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 98ca64d1beb6..cfcf95cfde76 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -499,6 +499,7 @@ enum fuse_opcode {
FUSE_COPY_FILE_RANGE = 47,
FUSE_SETUPMAPPING = 48,
FUSE_REMOVEMAPPING = 49,
+ FUSE_SYNC_FORGET = 50,
/* CUSE specific operations */
CUSE_INIT = 4096,
_______________________________________________
Virtio-fs mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/virtio-fs