Hi, Miklos:

This patch adds the async option for the flush/release operation in FUSE.

The async flush/release option allows a FUSE-based application to be terminated
without being blocked in the flush/release operation even in the presence of
complex external interactions. In addition, the async operation can be more
efficient when a large number of fuse-based files is involved.

---
Deadlock Example:

    Process A is a multi-threaded application that interacts with Process B,
    a FUSE-server.


               UNIX-domain socket
    App (A)  -----------------------  FUSE-server (B)
       |                                   |
       |                                   |
       |                                   |
       +-----------------------------------+
               open/flush/release


    When the FUSE-server receives an open and flush/release operations from
    Process A, it would in turn interact with Process A (e.g., coordinating
    shared memory allocation and de-allocation) using the connection-oriented
    UNIX-domain socket.

    A deadlock occurs when Process A is terminating:

      1) As part of process termination (i.e., do_exit() in the kernel), it
         would send "flush/release" to Process B, and wait for its reply due
         to the synchronous nature of the operation.

      2) When Process B receives the "flush/release" request, it would in turn
         send a message to Process A (over the UNIX-domain channel) and wait
         for its reply.

      3) As Process A is terminating, it may not be able to reply to Process B,
         resulting in a deadlock.

   The async flush/release option offers a simple and robust solution to the
   deadlock issue.

   With the async flush/release operation, all the files and sockets in Process
   A can be closed without being blocked, which in turn would un-block the
   operation in Process B using the UNIX-domain socket.
---

Signed-off-by: Enke Chen <enkec...@cisco.com>

Version: 4.7.0_next_20160805

 fs/fuse/file.c            |   39 +++++++++++++++++++++++++++------------
 fs/fuse/fuse_i.h          |    4 ++++
 fs/fuse/inode.c           |    4 +++-
 include/uapi/linux/fuse.h |    7 ++++++-
 4 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f394aff..7dd144f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -273,7 +273,8 @@ void fuse_release_common(struct file *file, int opcode)
         * synchronous RELEASE is allowed (and desirable) in this case
         * because the server can be trusted not to screw up.
         */
-       fuse_file_put(ff, ff->fc->destroy_req != NULL);
+       fuse_file_put(ff, (ff->fc->destroy_req != NULL) &&
+                     !ff->fc->async_flush);
 }
 
 static int fuse_open(struct inode *inode, struct file *file)
@@ -394,13 +395,19 @@ static void fuse_sync_writes(struct inode *inode)
        fuse_release_nowrite(inode);
 }
 
+static void fuse_flush_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+       if (req->out.h.error == -ENOSYS)
+               fc->no_flush = 1;
+}
+
 static int fuse_flush(struct file *file, fl_owner_t id)
 {
        struct inode *inode = file_inode(file);
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_file *ff = file->private_data;
        struct fuse_req *req;
-       struct fuse_flush_in inarg;
+       struct fuse_flush_in *inarg;
        int err;
 
        if (is_bad_inode(inode))
@@ -423,20 +430,28 @@ static int fuse_flush(struct file *file, fl_owner_t id)
 
        req = fuse_get_req_nofail_nopages(fc, file);
        memset(&inarg, 0, sizeof(inarg));
-       inarg.fh = ff->fh;
-       inarg.lock_owner = fuse_lock_owner_id(fc, id);
+       inarg = &req->misc.flush_in;
+       inarg->fh = ff->fh;
+       inarg->lock_owner = fuse_lock_owner_id(fc, id);
        req->in.h.opcode = FUSE_FLUSH;
        req->in.h.nodeid = get_node_id(inode);
        req->in.numargs = 1;
-       req->in.args[0].size = sizeof(inarg);
-       req->in.args[0].value = &inarg;
-       __set_bit(FR_FORCE, &req->flags);
-       fuse_request_send(fc, req);
-       err = req->out.h.error;
-       fuse_put_request(fc, req);
-       if (err == -ENOSYS) {
-               fc->no_flush = 1;
+       req->in.args[0].size = sizeof(struct fuse_flush_in);
+       req->in.args[0].value = inarg;
+       if (fc->async_flush) {
+               req->end = fuse_flush_end;
+               __set_bit(FR_BACKGROUND, &req->flags);
+               fuse_request_send_background(fc, req);
                err = 0;
+       } else {
+               __set_bit(FR_FORCE, &req->flags);
+               fuse_request_send(fc, req);
+               err = req->out.h.error;
+               fuse_put_request(fc, req);
+               if (err == -ENOSYS) {
+                       fc->no_flush = 1;
+                       err = 0;
+               }
        }
        return err;
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d98d8cc..f212cdd 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -350,6 +350,7 @@ struct fuse_req {
                        struct fuse_req *next;
                } write;
                struct fuse_notify_retrieve_in retrieve_in;
+               struct fuse_flush_in flush_in;
        } misc;
 
        /** page vector */
@@ -624,6 +625,9 @@ struct fuse_conn {
        /** Is lseek not implemented by fs? */
        unsigned no_lseek:1;
 
+       /** Does the filesystem want async flush? */
+       unsigned async_flush:1;
+
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4e05b51..2d031b1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -910,6 +910,8 @@ static void process_init_reply(struct fuse_conn *fc, struct 
fuse_req *req)
                                fc->writeback_cache = 1;
                        if (arg->flags & FUSE_PARALLEL_DIROPS)
                                fc->parallel_dirops = 1;
+                       if (arg->flags & FUSE_ASYNC_FLUSH)
+                               fc->async_flush = 1;
                        if (arg->time_gran && arg->time_gran <= 1000000000)
                                fc->sb->s_time_gran = arg->time_gran;
                } else {
@@ -941,7 +943,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct 
fuse_req *req)
                FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
                FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
                FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
-               FUSE_PARALLEL_DIROPS;
+               FUSE_PARALLEL_DIROPS | FUSE_ASYNC_FLUSH;
        req->in.h.opcode = FUSE_INIT;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(*arg);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 27e1736..76087d3 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -108,6 +108,9 @@
  *
  *  7.25
  *  - add FUSE_PARALLEL_DIROPS
+ *
+ *  7.26
+ *  - add FUSE_ASYNC_FLUSH
  */
 
 #ifndef _LINUX_FUSE_H
@@ -143,7 +146,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 25
+#define FUSE_KERNEL_MINOR_VERSION 26
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -238,6 +241,7 @@ struct fuse_file_lock {
  * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
  * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
  * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
+ * FUSE_ASYNC_FLUSH: asynchronous flush and release
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -258,6 +262,7 @@ struct fuse_file_lock {
 #define FUSE_WRITEBACK_CACHE   (1 << 16)
 #define FUSE_NO_OPEN_SUPPORT   (1 << 17)
 #define FUSE_PARALLEL_DIROPS    (1 << 18)
+#define FUSE_ASYNC_FLUSH       (1 << 19)
 
 /**
  * CUSE INIT request/reply flags

Reply via email to