This patch implements basic DAX support. mmap() is not implemented
yet and will come in later patches. This patch looks into implemeting
read/write.
We make use of interval tree to keep track of per inode dax mappings.
Do not use dax for file extending writes, instead just send WRITE message
to daemon (like we do for direct I/O path). This will keep write and
i_size change atomic w.r.t crash.
Signed-off-by: Stefan Hajnoczi
Signed-off-by: Dr. David Alan Gilbert
Signed-off-by: Vivek Goyal
Signed-off-by: Miklos Szeredi
Signed-off-by: Liu Bo
Signed-off-by: Peng Tao
---
fs/fuse/file.c| 550 +-
fs/fuse/fuse_i.h | 26 ++
fs/fuse/inode.c | 6 +
include/uapi/linux/fuse.h | 1 +
4 files changed, 577 insertions(+), 6 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 83d917f7e542..194fe3e404a7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -19,6 +19,9 @@
#include
#include
#include
+#include
+#include
+#include
static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
struct fuse_page_desc **desc)
@@ -188,6 +191,228 @@ static void fuse_link_write_file(struct file *file)
spin_unlock(>lock);
}
+static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn *fc)
+{
+ struct fuse_dax_mapping *dmap = NULL;
+
+ spin_lock(>lock);
+
+ if (fc->nr_free_ranges <= 0) {
+ spin_unlock(>lock);
+ return NULL;
+ }
+
+ WARN_ON(list_empty(>free_ranges));
+
+ /* Take a free range */
+ dmap = list_first_entry(>free_ranges, struct fuse_dax_mapping,
+ list);
+ list_del_init(>list);
+ fc->nr_free_ranges--;
+ spin_unlock(>lock);
+ return dmap;
+}
+
+/* This assumes fc->lock is held */
+static void __dmap_add_to_free_pool(struct fuse_conn *fc,
+ struct fuse_dax_mapping *dmap)
+{
+ list_add_tail(>list, >free_ranges);
+ fc->nr_free_ranges++;
+}
+
+static void dmap_add_to_free_pool(struct fuse_conn *fc,
+ struct fuse_dax_mapping *dmap)
+{
+ /* Return fuse_dax_mapping to free list */
+ spin_lock(>lock);
+ __dmap_add_to_free_pool(fc, dmap);
+ spin_unlock(>lock);
+}
+
+static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx,
+ struct fuse_dax_mapping *dmap, bool writable,
+ bool upgrade)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_setupmapping_in inarg;
+ loff_t offset = start_idx << FUSE_DAX_SHIFT;
+ FUSE_ARGS(args);
+ ssize_t err;
+
+ WARN_ON(fc->nr_free_ranges < 0);
+
+ /* Ask fuse daemon to setup mapping */
+ memset(, 0, sizeof(inarg));
+ inarg.foffset = offset;
+ inarg.fh = -1;
+ inarg.moffset = dmap->window_offset;
+ inarg.len = FUSE_DAX_SZ;
+ inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
+ if (writable)
+ inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
+ args.opcode = FUSE_SETUPMAPPING;
+ args.nodeid = fi->nodeid;
+ args.in_numargs = 1;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value =
+ err = fuse_simple_request(fc, );
+ if (err < 0)
+ return err;
+ dmap->writable = writable;
+ if (!upgrade) {
+ dmap->itn.start = dmap->itn.last = start_idx;
+ /* Protected by fi->i_dmap_sem */
+ interval_tree_insert(>itn, >dmap_tree);
+ fi->nr_dmaps++;
+ }
+ return 0;
+}
+
+static int
+fuse_send_removemapping(struct inode *inode,
+ struct fuse_removemapping_in *inargp,
+ struct fuse_removemapping_one *remove_one)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+
+ args.opcode = FUSE_REMOVEMAPPING;
+ args.nodeid = fi->nodeid;
+ args.in_numargs = 2;
+ args.in_args[0].size = sizeof(*inargp);
+ args.in_args[0].value = inargp;
+ args.in_args[1].size = inargp->count * sizeof(*remove_one);
+ args.in_args[1].value = remove_one;
+ return fuse_simple_request(fc, );
+}
+
+static int dmap_removemapping_list(struct inode *inode, unsigned num,
+ struct list_head *to_remove)
+{
+ struct fuse_removemapping_one *remove_one, *ptr;
+ struct fuse_removemapping_in inarg;
+ struct fuse_dax_mapping *dmap;
+ int ret, i = 0, nr_alloc;
+
+ nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
+ remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS);
+ if (!remove_one)
+ return -ENOMEM;
+
+