[Qemu-devel] [PATCH 1/2] 9p: v9fs add writepages.

Edward Shishkin Mon, 10 Oct 2016 11:27:09 -0700

Add a v9fs private ->writepages() method of address_space
operations for merging pages into long 9p messages.


Signed-off-by: Edward Shishkin <edward.shish...@gmail.com>
---
 fs/9p/v9fs.c      |  46 +++++++
 fs/9p/v9fs.h      |  22 +++-
 fs/9p/vfs_addr.c  | 357 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/vfs_super.c |   8 +-
 4 files changed, 431 insertions(+), 2 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 072e759..3b49daf 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -32,6 +32,7 @@
 #include <linux/parser.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
+#include <linux/pagemap.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
@@ -309,6 +310,49 @@ static int v9fs_parse_options(struct v9fs_session_info 
*v9ses, char *opts)
        return ret;
 }
 
+void put_flush_set(struct v9fs_flush_set *fset)
+{
+       if (!fset)
+               return;
+       if (fset->pages)
+               kfree(fset->pages);
+       if (fset->buf)
+               kfree(fset->buf);
+       kfree(fset);
+}
+
+/**
+ * Allocate and initalize flush set
+ * Pre-conditions: valid msize is set
+ */
+int alloc_init_flush_set(struct v9fs_session_info *v9ses)
+{
+       int ret = -ENOMEM;
+       int num_pages;
+       struct v9fs_flush_set *fset = NULL;
+
+       num_pages = v9ses->clnt->msize >> PAGE_SHIFT;
+       if (num_pages < 2)
+               /* speedup impossible */
+               return 0;
+       fset = kzalloc(sizeof(*fset), GFP_KERNEL);
+       if (!fset)
+               goto error;
+       fset->num_pages = num_pages;
+       fset->pages = kzalloc(num_pages * sizeof(*fset->pages), GFP_KERNEL);
+       if (!fset->pages)
+               goto error;
+       fset->buf = kzalloc(num_pages << PAGE_SHIFT, GFP_USER);
+       if (!fset->buf)
+               goto error;
+       spin_lock_init(&(fset->lock));
+       v9ses->flush = fset;
+       return 0;
+ error:
+       put_flush_set(fset);
+       return ret;
+}
+
 /**
  * v9fs_session_init - initialize session
  * @v9ses: session information structure
@@ -444,6 +488,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
        kfree(v9ses->uname);
        kfree(v9ses->aname);
 
+       put_flush_set(v9ses->flush);
+
        bdi_destroy(&v9ses->bdi);
 
        spin_lock(&v9fs_sessionlist_lock);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 6877050..d1092e4 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -23,6 +23,7 @@
 #ifndef FS_9P_V9FS_H
 #define FS_9P_V9FS_H
 
+#include <linux/kconfig.h>
 #include <linux/backing-dev.h>
 
 /**
@@ -69,6 +70,13 @@ enum p9_cache_modes {
        CACHE_FSCACHE,
 };
 
+struct v9fs_flush_set {
+        struct page **pages;
+       int num_pages;
+        char *buf;
+       spinlock_t lock;
+};
+
 /**
  * struct v9fs_session_info - per-instance session information
  * @flags: session options of type &p9_session_flags
@@ -105,7 +113,7 @@ struct v9fs_session_info {
        char *cachetag;
        struct fscache_cookie *fscache;
 #endif
-
+       struct v9fs_flush_set *flush; /* flush set for writepages */
        char *uname;            /* user name to mount as */
        char *aname;            /* name of remote hierarchy being mounted */
        unsigned int maxdata;   /* max data for client interface */
@@ -158,6 +166,8 @@ extern const struct inode_operations 
v9fs_symlink_inode_operations_dotl;
 extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
                                              struct p9_fid *fid,
                                              struct super_block *sb, int new);
+extern int alloc_init_flush_set(struct v9fs_session_info *v9ses);
+extern void put_flush_set(struct v9fs_flush_set *fset);
 
 /* other default globals */
 #define V9FS_PORT      564
@@ -222,4 +232,14 @@ v9fs_get_new_inode_from_fid(struct v9fs_session_info 
*v9ses, struct p9_fid *fid,
                return v9fs_inode_from_fid(v9ses, fid, sb, 1);
 }
 
+static inline int spin_trylock_flush_set(struct v9fs_flush_set *fset)
+{
+       return spin_trylock(&(fset->lock));
+}
+
+static inline void spin_unlock_flush_set(struct v9fs_flush_set *fset)
+{
+       spin_unlock(&(fset->lock));
+}
+
 #endif
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 6181ad7..e871886 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -36,6 +36,7 @@
 #include <linux/uio.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
+#include <trace/events/writeback.h>
 
 #include "v9fs.h"
 #include "v9fs_vfs.h"
@@ -209,6 +210,361 @@ static int v9fs_vfs_writepage(struct page *page, struct 
writeback_control *wbc)
        return retval;
 }
 
+static void redirty_pages_for_writeback(struct page **pages, int nr,
+                                       struct writeback_control *wbc)
+{
+       int i;
+       for (i = 0; i < nr; i++) {
+               lock_page(pages[i]);
+               redirty_page_for_writepage(wbc, pages[i]);
+               unlock_page(pages[i]);
+       }
+}
+
+static void set_pages_error(struct page **pages, int nr, int error)
+{
+       int i;
+       for (i = 0; i < nr; i++) {
+               lock_page(pages[i]);
+               SetPageError(pages[i]);
+               mapping_set_error(pages[i]->mapping, error);
+               unlock_page(pages[i]);
+       }
+}
+
+#define V9FS_WRITEPAGES_DEBUG   (0)
+
+struct flush_context {
+       struct writeback_control *wbc;
+       struct address_space *mapping;
+       struct v9fs_flush_set *fset;
+       pgoff_t done_index;
+       pgoff_t writeback_index;
+       pgoff_t index;
+       pgoff_t end; /* Inclusive */
+       const char *msg;
+       int cycled;
+       int range_whole;
+       int done;
+};
+
+/**
+ * Copy a page with file's data to buffer.
+ * Handle races with truncate, etc.
+ * Return number of copied bytes
+ *
+ * @page: page to copy data from;
+ * @page_nr: serial number of the page
+ */
+static int flush_page(struct page *page, int page_nr, struct flush_context 
*ctx)
+{
+       char *kdata;
+       loff_t isize;
+       int copied = 0;
+       struct writeback_control *wbc = ctx->wbc;
+       /*
+        * At this point, the page may be truncated or
+        * invalidated (changing page->mapping to NULL), or
+        * even swizzled back from swapper_space to tmpfs file
+        * mapping. However, page->index will not change
+        * because we have a reference on the page.
+        */
+       if (page->index > ctx->end) {
+               /*
+                * can't be range_cyclic (1st pass) because
+                * end == -1 in that case.
+                */
+               ctx->done = 1;
+               ctx->msg = "page out of range";
+               goto exit;
+       }
+       ctx->done_index = page->index;
+       lock_page(page);
+       /*
+        * Page truncated or invalidated. We can freely skip it
+        * then, even for data integrity operations: the page
+        * has disappeared concurrently, so there could be no
+        * real expectation of this data interity operation
+        * even if there is now a new, dirty page at the same
+        * pagecache address.
+        */
+       if (unlikely(page->mapping != ctx->mapping)) {
+               unlock_page(page);
+               ctx->msg = "page truncated or invalidated";
+               goto exit;
+       }
+       if (!PageDirty(page)) {
+               /*
+                * someone wrote it for us
+                */
+               unlock_page(page);
+               ctx->msg = "page not dirty";
+               goto exit;
+       }
+       if (PageWriteback(page)) {
+               if (wbc->sync_mode != WB_SYNC_NONE)
+                       wait_on_page_writeback(page);
+               else {
+                       unlock_page(page);
+                       ctx->msg = "page is writeback";
+                       goto exit;
+               }
+       }
+       BUG_ON(PageWriteback(page));
+       if (!clear_page_dirty_for_io(page)) {
+               unlock_page(page);
+               ctx->msg = "failed to clear page dirty";
+               goto exit;
+       }
+       trace_wbc_writepage(wbc, inode_to_bdi(ctx->mapping->host));
+
+       set_page_writeback(page);
+       isize = i_size_read(ctx->mapping->host);
+       if (page->index == isize >> PAGE_SHIFT)
+               copied = isize & ~PAGE_MASK;
+       else
+               copied = PAGE_SIZE;
+       kdata = kmap_atomic(page);
+       memcpy(ctx->fset->buf + (page_nr << PAGE_SHIFT), kdata, copied);
+       kunmap_atomic(kdata);
+       end_page_writeback(page);
+
+       unlock_page(page);
+       /*
+        * We stop writing back only if we are not doing
+        * integrity sync. In case of integrity sync we have to
+        * keep going until we have written all the pages
+        * we tagged for writeback prior to entering this loop.
+        */
+       if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
+               ctx->done = 1;
+ exit:
+       return copied;
+}
+
+static int send_buffer(off_t offset, int len, struct flush_context *ctx)
+{
+       int ret = 0;
+       struct kvec kvec;
+       struct iov_iter iter;
+       struct v9fs_inode *v9inode = V9FS_I(ctx->mapping->host);
+
+       kvec.iov_base = ctx->fset->buf;
+       kvec.iov_len = len;
+       iov_iter_kvec(&iter, WRITE | ITER_KVEC, &kvec, 1, len);
+       BUG_ON(!v9inode->writeback_fid);
+
+       p9_client_write(v9inode->writeback_fid, offset, &iter, &ret);
+       return ret;
+}
+
+/**
+ * Helper function for managing 9pFS write requests.
+ * The main purpose of this function is to provide support for
+ * the coalescing of several pages into a single 9p message.
+ * This is similarly to NFS's pagelist.
+ *
+ * Copy pages with adjusent indices to a buffer and send it to
+ * the server.
+ *
+ * @pages: array of pages with ascending indices;
+ * @nr_pages: number of pages in the array;
+ */
+static int flush_pages(struct page **pages, int nr_pages,
+                      struct flush_context *ctx)
+{
+       int ret;
+       int pos = 0;
+       int iter_pos;
+       int iter_nrpages;
+       pgoff_t iter_page_idx;
+
+       while (pos < nr_pages) {
+
+               int i;
+               int iter_len = 0;
+               struct page *page;
+
+               iter_pos = pos;
+               iter_nrpages = 0;
+               iter_page_idx = pages[pos]->index;
+
+               for (i = 0; pos < nr_pages; i++) {
+                       int from_page;
+
+                       page = pages[pos];
+                       if (page->index != iter_page_idx + i) {
+                               /*
+                                * Hole in the indices,
+                                * further coalesce impossible.
+                                * Try to send what we have accumulated.
+                                * This page will be processed in the next
+                                * iteration
+                                */
+                               goto iter_send;
+                       }
+                       from_page = flush_page(page, i, ctx);
+
+                       iter_len += from_page;
+                       iter_nrpages++;
+                       pos++;
+
+                       if (from_page != PAGE_SIZE) {
+                               /*
+                                * Not full page was flushed,
+                                * further coalesce impossible.
+                                * Try to send what we have accumulated.
+                                */
+#if V9FS_WRITEPAGES_DEBUG
+                               if (from_page == 0)
+                                   printk("9p: page %lu is not flushed (%s)\n",
+                                          page->index, ctx->msg);
+#endif
+                               goto iter_send;
+                       }
+               };
+       iter_send:
+               if (iter_len == 0)
+                       /*
+                        * Nothing to send
+                        */
+                       goto next_iter;
+               ret = send_buffer(iter_page_idx << PAGE_SHIFT,
+                                 iter_len, ctx);
+               if (ret == -EAGAIN) {
+                       redirty_pages_for_writeback(pages + iter_pos,
+                                                   iter_nrpages, ctx->wbc);
+                       ret = 0;
+               } else if (ret < 0) {
+                       /*
+                        * Something bad happened.
+                        * done_index is set past this chunk,
+                        * so media errors will not choke
+                        * background writeout for the entire
+                        * file.
+                        */
+                       printk("9p: send_buffer failed (%d)\n", ret);
+
+                       set_pages_error(pages + iter_pos, iter_nrpages, ret);
+                       ctx->done_index =
+                               pages[iter_pos + iter_nrpages - 1]->index + 1;
+                       ctx->done = 1;
+                       return ret;
+               } else
+                       ret = 0;
+       next_iter:
+               if (ctx->done)
+                       return ret;
+       };
+       return 0;
+}
+
+static void init_flush_context(struct flush_context *ctx,
+                              struct address_space *mapping,
+                              struct writeback_control *wbc,
+                              struct v9fs_flush_set *fset)
+{
+       ctx->wbc = wbc;
+       ctx->mapping = mapping;
+       ctx->fset = fset;
+       ctx->done = 0;
+       ctx->range_whole = 0;
+
+       if (wbc->range_cyclic) {
+               ctx->writeback_index = mapping->writeback_index;
+               ctx->index = ctx->writeback_index;
+               if (ctx->index == 0)
+                       ctx->cycled = 1;
+               else
+                       ctx->cycled = 0;
+               ctx->end = -1;
+       } else {
+               ctx->index = wbc->range_start >> PAGE_SHIFT;
+               ctx->end = wbc->range_end >> PAGE_SHIFT;
+               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+                       ctx->range_whole = 1;
+               ctx->cycled = 1; /* ignore range_cyclic tests */
+       }
+}
+
+/**
+ * Pre-condition: flush set is locked
+ */
+static int v9fs_writepages_fastpath(struct address_space *mapping,
+                                   struct writeback_control *wbc,
+                                   struct v9fs_flush_set *fset)
+{
+       int ret = 0;
+       int tag;
+       int nr_pages;
+        struct page **pages = fset->pages;
+       struct flush_context ctx;
+
+       init_flush_context(&ctx, mapping, wbc, fset);
+
+       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+               tag = PAGECACHE_TAG_TOWRITE;
+       else
+               tag = PAGECACHE_TAG_DIRTY;
+retry:
+       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+               tag_pages_for_writeback(mapping, ctx.index, ctx.end);
+
+       ctx.done_index = ctx.index;
+
+       while (!ctx.done && (ctx.index <= ctx.end)) {
+               int i;
+               nr_pages = find_get_pages_tag(mapping, &ctx.index, tag,
+                                             1 + min(ctx.end - ctx.index,
+                                             (pgoff_t)(fset->num_pages - 1)),
+                                             pages);
+               if (nr_pages == 0)
+                       break;
+
+               ret = flush_pages(pages, nr_pages, &ctx);
+               /*
+                * unpin pages
+                */
+               for (i = 0; i < nr_pages; i++)
+                       put_page(pages[i]);
+               if (ret < 0)
+                       break;
+               cond_resched();
+       }
+       if (!ctx.cycled && !ctx.done) {
+               /*
+                * range_cyclic:
+                * We hit the last page and there is more work
+                * to be done: wrap back to the start of the file
+                */
+               ctx.cycled = 1;
+               ctx.index = 0;
+               ctx.end = ctx.writeback_index - 1;
+               goto retry;
+       }
+       if (wbc->range_cyclic || (ctx.range_whole && wbc->nr_to_write > 0))
+               mapping->writeback_index = ctx.done_index;
+       return ret;
+}
+
+static int v9fs_writepages(struct address_space *mapping,
+                          struct writeback_control *wbc)
+{
+       int ret;
+       struct v9fs_flush_set *fset;
+
+       fset = v9fs_inode2v9ses(mapping->host)->flush;
+       if (!fset || !spin_trylock_flush_set(fset))
+               /*
+                * do it by slow way
+                */
+               return generic_writepages(mapping, wbc);
+
+       ret = v9fs_writepages_fastpath(mapping, wbc, fset);
+       spin_unlock_flush_set(fset);
+       return ret;
+}
+
 /**
  * v9fs_launder_page - Writeback a dirty page
  * Returns 0 on success.
@@ -342,6 +698,7 @@ const struct address_space_operations v9fs_addr_operations 
= {
        .readpages = v9fs_vfs_readpages,
        .set_page_dirty = __set_page_dirty_nobuffers,
        .writepage = v9fs_vfs_writepage,
+       .writepages = v9fs_writepages,
        .write_begin = v9fs_write_begin,
        .write_end = v9fs_write_end,
        .releasepage = v9fs_release_page,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index de3ed86..c1f9af1 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -140,8 +140,14 @@ static struct dentry *v9fs_mount(struct file_system_type 
*fs_type, int flags,
        }
        v9fs_fill_super(sb, v9ses, flags, data);
 
-       if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+       if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+               retval = alloc_init_flush_set(v9ses);
+               if (IS_ERR(v9ses->flush)) {
+                       retval = PTR_ERR(fid);
+                       goto release_sb;
+               }
                sb->s_d_op = &v9fs_cached_dentry_operations;
+       }
        else
                sb->s_d_op = &v9fs_dentry_operations;
 
-- 
2.7.4

[Qemu-devel] [PATCH 1/2] 9p: v9fs add writepages.

Reply via email to